xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 338d3105ec4b0533d515cc05158bb578850515e8)
11c2a3de1SBarry Smith 
2397b6df1SKris Buschelman /*
3c2b5dc30SHong Zhang     Provides an interface to the MUMPS sparse solver
4397b6df1SKris Buschelman */
567602552SJunchao Zhang #include <petscpkg_version.h>
6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
87ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h>
9397b6df1SKris Buschelman 
10397b6df1SKris Buschelman EXTERN_C_BEGIN
11397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX)
122907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
132907cef9SHong Zhang     #include <cmumps_c.h>
142907cef9SHong Zhang   #else
15c6db04a5SJed Brown     #include <zmumps_c.h>
162907cef9SHong Zhang   #endif
172907cef9SHong Zhang #else
182907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
192907cef9SHong Zhang     #include <smumps_c.h>
20397b6df1SKris Buschelman   #else
21c6db04a5SJed Brown     #include <dmumps_c.h>
22397b6df1SKris Buschelman   #endif
232907cef9SHong Zhang #endif
24397b6df1SKris Buschelman EXTERN_C_END
25397b6df1SKris Buschelman #define JOB_INIT         -1
26413bcc21SPierre Jolivet #define JOB_NULL         0
273d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1
283d472b54SHong Zhang #define JOB_FACTNUMERIC  2
293d472b54SHong Zhang #define JOB_SOLVE        3
30397b6df1SKris Buschelman #define JOB_END          -2
313d472b54SHong Zhang 
322907cef9SHong Zhang /* calls to MUMPS */
332907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX)
342907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
353ab56b82SJunchao Zhang     #define MUMPS_c cmumps_c
362907cef9SHong Zhang   #else
373ab56b82SJunchao Zhang     #define MUMPS_c zmumps_c
382907cef9SHong Zhang   #endif
392907cef9SHong Zhang #else
402907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
413ab56b82SJunchao Zhang     #define MUMPS_c smumps_c
422907cef9SHong Zhang   #else
433ab56b82SJunchao Zhang     #define MUMPS_c dmumps_c
442907cef9SHong Zhang   #endif
452907cef9SHong Zhang #endif
462907cef9SHong Zhang 
47a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48a6053eceSJunchao Zhang    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49a6053eceSJunchao Zhang    naming convention in PetscMPIInt, PetscBLASInt etc.
50a6053eceSJunchao Zhang */
51a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt;
52a6053eceSJunchao Zhang 
5367602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
5467602552SJunchao Zhang   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55a6053eceSJunchao Zhang     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
5667602552SJunchao Zhang   #endif
57a6053eceSJunchao Zhang #else
5867602552SJunchao Zhang   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
5967602552SJunchao Zhang     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
6067602552SJunchao Zhang   #endif
6167602552SJunchao Zhang #endif
6267602552SJunchao Zhang 
63a6053eceSJunchao Zhang #define MPIU_MUMPSINT       MPI_INT
64a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647
65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648
66a6053eceSJunchao Zhang 
67a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69d71ae5a4SJacob Faibussowitsch {
70a6053eceSJunchao Zhang   PetscFunctionBegin;
71ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES)
722c71b3e2SJacob Faibussowitsch   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73ece88022SPierre Jolivet #endif
74a6053eceSJunchao Zhang   *b = (PetscMUMPSInt)(a);
753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
76a6053eceSJunchao Zhang }
77a6053eceSJunchao Zhang 
78a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */
79d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80d71ae5a4SJacob Faibussowitsch {
81a6053eceSJunchao Zhang   PetscInt  myval;
82a6053eceSJunchao Zhang   PetscBool myset;
83a6053eceSJunchao Zhang   PetscFunctionBegin;
84a6053eceSJunchao Zhang   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
859566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
869566063dSJacob Faibussowitsch   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87a6053eceSJunchao Zhang   if (set) *set = myset;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89a6053eceSJunchao Zhang }
90a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
91a6053eceSJunchao Zhang 
92217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
933ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
943ab56b82SJunchao Zhang   #define PetscMUMPS_c(mumps) \
953ab56b82SJunchao Zhang     do { \
963ab56b82SJunchao Zhang       if (mumps->use_petsc_omp_support) { \
973ab56b82SJunchao Zhang         if (mumps->is_omp_master) { \
989566063dSJacob Faibussowitsch           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
9914ffdc6fSStefano Zampini           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
10014ffdc6fSStefano Zampini           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
10114ffdc6fSStefano Zampini           PetscCall(PetscFPTrapPop()); \
1029566063dSJacob Faibussowitsch           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
1033ab56b82SJunchao Zhang         } \
1049566063dSJacob Faibussowitsch         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
105c3714a1dSJunchao Zhang         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
106c3714a1dSJunchao Zhang          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
107c3714a1dSJunchao Zhang          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
108c3714a1dSJunchao Zhang          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
109c3714a1dSJunchao Zhang       */ \
110*338d3105SPierre Jolivet         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111*338d3105SPierre Jolivet         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
112*338d3105SPierre Jolivet         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113*338d3105SPierre Jolivet         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
1143ab56b82SJunchao Zhang       } else { \
11514ffdc6fSStefano Zampini         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
11614ffdc6fSStefano Zampini         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
11714ffdc6fSStefano Zampini         PetscCall(PetscFPTrapPop()); \
1183ab56b82SJunchao Zhang       } \
1193ab56b82SJunchao Zhang     } while (0)
1203ab56b82SJunchao Zhang #else
1213ab56b82SJunchao Zhang   #define PetscMUMPS_c(mumps) \
122d71ae5a4SJacob Faibussowitsch     do { \
12314ffdc6fSStefano Zampini       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
12414ffdc6fSStefano Zampini       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
12514ffdc6fSStefano Zampini       PetscCall(PetscFPTrapPop()); \
126d71ae5a4SJacob Faibussowitsch     } while (0)
1273ab56b82SJunchao Zhang #endif
1283ab56b82SJunchao Zhang 
129940cd9d6SSatish Balay /* declare MumpsScalar */
130940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX)
131940cd9d6SSatish Balay   #if defined(PETSC_USE_REAL_SINGLE)
132940cd9d6SSatish Balay     #define MumpsScalar mumps_complex
133940cd9d6SSatish Balay   #else
134940cd9d6SSatish Balay     #define MumpsScalar mumps_double_complex
135940cd9d6SSatish Balay   #endif
136940cd9d6SSatish Balay #else
137940cd9d6SSatish Balay   #define MumpsScalar PetscScalar
138940cd9d6SSatish Balay #endif
1393d472b54SHong Zhang 
140397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */
141397b6df1SKris Buschelman #define ICNTL(I)  icntl[(I)-1]
142397b6df1SKris Buschelman #define CNTL(I)   cntl[(I)-1]
143397b6df1SKris Buschelman #define INFOG(I)  infog[(I)-1]
144a7aca84bSHong Zhang #define INFO(I)   info[(I)-1]
145397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1]
146adc1d99fSHong Zhang #define RINFO(I)  rinfo[(I)-1]
147397b6df1SKris Buschelman 
148a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS;
149a6053eceSJunchao Zhang struct Mat_MUMPS {
150397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX)
1512907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
1522907cef9SHong Zhang   CMUMPS_STRUC_C id;
1532907cef9SHong Zhang   #else
154397b6df1SKris Buschelman   ZMUMPS_STRUC_C id;
1552907cef9SHong Zhang   #endif
1562907cef9SHong Zhang #else
1572907cef9SHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
1582907cef9SHong Zhang   SMUMPS_STRUC_C id;
159397b6df1SKris Buschelman   #else
160397b6df1SKris Buschelman   DMUMPS_STRUC_C id;
161397b6df1SKris Buschelman   #endif
1622907cef9SHong Zhang #endif
1632907cef9SHong Zhang 
164397b6df1SKris Buschelman   MatStructure   matstruc;
1652d4298aeSJunchao Zhang   PetscMPIInt    myid, petsc_size;
166a6053eceSJunchao Zhang   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
167a6053eceSJunchao Zhang   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
168a6053eceSJunchao Zhang   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
169a6053eceSJunchao Zhang   PetscMUMPSInt  sym;
1702d4298aeSJunchao Zhang   MPI_Comm       mumps_comm;
171413bcc21SPierre Jolivet   PetscMUMPSInt *ICNTL_pre;
172413bcc21SPierre Jolivet   PetscReal     *CNTL_pre;
173a6053eceSJunchao Zhang   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
174801fbe65SHong Zhang   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
17525aac85cSJunchao Zhang   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
17667602552SJunchao Zhang   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
17767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
17867602552SJunchao Zhang   PetscInt    *rhs_nrow, max_nrhs;
17967602552SJunchao Zhang   PetscMPIInt *rhs_recvcounts, *rhs_disps;
18067602552SJunchao Zhang   PetscScalar *rhs_loc, *rhs_recvbuf;
18167602552SJunchao Zhang #endif
182801fbe65SHong Zhang   Vec            b_seq, x_seq;
183a6053eceSJunchao Zhang   PetscInt       ninfo, *info; /* which INFO to display */
184b5fa320bSStefano Zampini   PetscInt       sizeredrhs;
18559ac8732SStefano Zampini   PetscScalar   *schur_sol;
18659ac8732SStefano Zampini   PetscInt       schur_sizesol;
187a6053eceSJunchao Zhang   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
188a6053eceSJunchao Zhang   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
189a6053eceSJunchao Zhang   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
1902205254eSKarl Rupp 
191a6053eceSJunchao Zhang   /* stuff used by petsc/mumps OpenMP support*/
1923ab56b82SJunchao Zhang   PetscBool    use_petsc_omp_support;
193da81f932SPierre Jolivet   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
1943ab56b82SJunchao Zhang   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
195a6053eceSJunchao Zhang   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
196a6053eceSJunchao Zhang   PetscMPIInt  tag, omp_comm_size;
1973ab56b82SJunchao Zhang   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
198a6053eceSJunchao Zhang   MPI_Request *reqs;
199a6053eceSJunchao Zhang };
2003ab56b82SJunchao Zhang 
201a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
202a6053eceSJunchao Zhang    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
203a6053eceSJunchao Zhang  */
204d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
205d71ae5a4SJacob Faibussowitsch {
206a6053eceSJunchao Zhang   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
207f0c56d0fSKris Buschelman 
208a6053eceSJunchao Zhang   PetscFunctionBegin;
209a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES)
210a6053eceSJunchao Zhang   {
211a6053eceSJunchao Zhang     PetscInt i;
212a6053eceSJunchao Zhang     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
2139566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->ia_alloc));
2149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
215a6053eceSJunchao Zhang       mumps->cur_ilen = nrow + 1;
216a6053eceSJunchao Zhang     }
217a6053eceSJunchao Zhang     if (nnz > mumps->cur_jlen) {
2189566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->ja_alloc));
2199566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
220a6053eceSJunchao Zhang       mumps->cur_jlen = nnz;
221a6053eceSJunchao Zhang     }
2229566063dSJacob Faibussowitsch     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
2239566063dSJacob Faibussowitsch     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
224a6053eceSJunchao Zhang     *ia_mumps = mumps->ia_alloc;
225a6053eceSJunchao Zhang     *ja_mumps = mumps->ja_alloc;
226a6053eceSJunchao Zhang   }
227a6053eceSJunchao Zhang #else
228a6053eceSJunchao Zhang   *ia_mumps          = ia;
229a6053eceSJunchao Zhang   *ja_mumps          = ja;
230a6053eceSJunchao Zhang #endif
2319566063dSJacob Faibussowitsch   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
2323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
233a6053eceSJunchao Zhang }
234b24902e0SBarry Smith 
235d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
236d71ae5a4SJacob Faibussowitsch {
237b5fa320bSStefano Zampini   PetscFunctionBegin;
2389566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.listvar_schur));
2399566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.redrhs));
2409566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->schur_sol));
24159ac8732SStefano Zampini   mumps->id.size_schur = 0;
242b3cb21ddSStefano Zampini   mumps->id.schur_lld  = 0;
24359ac8732SStefano Zampini   mumps->id.ICNTL(19)  = 0;
2443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24559ac8732SStefano Zampini }
24659ac8732SStefano Zampini 
247b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */
248d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
249d71ae5a4SJacob Faibussowitsch {
250b3cb21ddSStefano Zampini   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
251b3cb21ddSStefano Zampini   Mat                  S, B, X;
252b3cb21ddSStefano Zampini   MatFactorSchurStatus schurstatus;
253b3cb21ddSStefano Zampini   PetscInt             sizesol;
25459ac8732SStefano Zampini 
25559ac8732SStefano Zampini   PetscFunctionBegin;
2569566063dSJacob Faibussowitsch   PetscCall(MatFactorFactorizeSchurComplement(F));
2579566063dSJacob Faibussowitsch   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
2589566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
2599566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
260a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2619566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU(B, S->boundtocpu));
262a3d589ffSStefano Zampini #endif
263b3cb21ddSStefano Zampini   switch (schurstatus) {
264d71ae5a4SJacob Faibussowitsch   case MAT_FACTOR_SCHUR_FACTORED:
265d71ae5a4SJacob Faibussowitsch     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
266d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
267a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2689566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(X, S->boundtocpu));
269a3d589ffSStefano Zampini #endif
270b3cb21ddSStefano Zampini     if (!mumps->id.ICNTL(9)) { /* transpose solve */
2719566063dSJacob Faibussowitsch       PetscCall(MatMatSolveTranspose(S, B, X));
27259ac8732SStefano Zampini     } else {
2739566063dSJacob Faibussowitsch       PetscCall(MatMatSolve(S, B, X));
27459ac8732SStefano Zampini     }
275b3cb21ddSStefano Zampini     break;
276b3cb21ddSStefano Zampini   case MAT_FACTOR_SCHUR_INVERTED:
277b3cb21ddSStefano Zampini     sizesol = mumps->id.nrhs * mumps->id.size_schur;
27859ac8732SStefano Zampini     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
2799566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->schur_sol));
2809566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
28159ac8732SStefano Zampini       mumps->schur_sizesol = sizesol;
282b5fa320bSStefano Zampini     }
2839566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
2849566063dSJacob Faibussowitsch     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
285a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2869566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(X, S->boundtocpu));
287a3d589ffSStefano Zampini #endif
2889566063dSJacob Faibussowitsch     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
28959ac8732SStefano Zampini     if (!mumps->id.ICNTL(9)) { /* transpose solve */
2909566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
291b5fa320bSStefano Zampini     } else {
2929566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
293b5fa320bSStefano Zampini     }
2949566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(X));
2959566063dSJacob Faibussowitsch     PetscCall(MatProductSymbolic(X));
2969566063dSJacob Faibussowitsch     PetscCall(MatProductNumeric(X));
2974417c5e8SHong Zhang 
2989566063dSJacob Faibussowitsch     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
299b3cb21ddSStefano Zampini     break;
300d71ae5a4SJacob Faibussowitsch   default:
301d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
30259ac8732SStefano Zampini   }
3039566063dSJacob Faibussowitsch   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
3049566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
3059566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&X));
3063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
307b5fa320bSStefano Zampini }
308b5fa320bSStefano Zampini 
309d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
310d71ae5a4SJacob Faibussowitsch {
311b3cb21ddSStefano Zampini   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
312b5fa320bSStefano Zampini 
313b5fa320bSStefano Zampini   PetscFunctionBegin;
314b5fa320bSStefano Zampini   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
3153ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
316b5fa320bSStefano Zampini   }
317b8f61ee1SStefano Zampini   if (!expansion) { /* prepare for the condensation step */
318b5fa320bSStefano Zampini     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
319b5fa320bSStefano Zampini     /* allocate MUMPS internal array to store reduced right-hand sides */
320b5fa320bSStefano Zampini     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
3219566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->id.redrhs));
322b5fa320bSStefano Zampini       mumps->id.lredrhs = mumps->id.size_schur;
3239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
324b5fa320bSStefano Zampini       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
325b5fa320bSStefano Zampini     }
326b5fa320bSStefano Zampini     mumps->id.ICNTL(26) = 1; /* condensation phase */
327b5fa320bSStefano Zampini   } else {                   /* prepare for the expansion step */
328b8f61ee1SStefano Zampini     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
3299566063dSJacob Faibussowitsch     PetscCall(MatMumpsSolveSchur_Private(F));
330b5fa320bSStefano Zampini     mumps->id.ICNTL(26) = 2; /* expansion phase */
3313ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
33208401ef6SPierre Jolivet     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
333b5fa320bSStefano Zampini     /* restore defaults */
334b5fa320bSStefano Zampini     mumps->id.ICNTL(26) = -1;
335d3d598ffSStefano Zampini     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
336d3d598ffSStefano Zampini     if (mumps->id.nrhs > 1) {
3379566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->id.redrhs));
338d3d598ffSStefano Zampini       mumps->id.lredrhs = 0;
339d3d598ffSStefano Zampini       mumps->sizeredrhs = 0;
340d3d598ffSStefano Zampini     }
341b5fa320bSStefano Zampini   }
3423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
343b5fa320bSStefano Zampini }
344b5fa320bSStefano Zampini 
345397b6df1SKris Buschelman /*
346d341cd04SHong Zhang   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
347d341cd04SHong Zhang 
348397b6df1SKris Buschelman   input:
34975480915SPierre Jolivet     A       - matrix in aij,baij or sbaij format
350397b6df1SKris Buschelman     shift   - 0: C style output triple; 1: Fortran style output triple.
351bccb9932SShri Abhyankar     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
352bccb9932SShri Abhyankar               MAT_REUSE_MATRIX:   only the values in v array are updated
353397b6df1SKris Buschelman   output:
354397b6df1SKris Buschelman     nnz     - dim of r, c, and v (number of local nonzero entries of A)
355397b6df1SKris Buschelman     r, c, v - row and col index, matrix values (matrix triples)
356eb9baa12SBarry Smith 
357eb9baa12SBarry Smith   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
3587ee00b23SStefano Zampini   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
359eb9baa12SBarry Smith   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
360eb9baa12SBarry Smith 
361397b6df1SKris Buschelman  */
36216ebf90aSShri Abhyankar 
363d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
364d71ae5a4SJacob Faibussowitsch {
365a3d589ffSStefano Zampini   const PetscScalar *av;
366185f6596SHong Zhang   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
367a6053eceSJunchao Zhang   PetscInt64         nz, rnz, i, j, k;
368a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
36916ebf90aSShri Abhyankar   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
370397b6df1SKris Buschelman 
371397b6df1SKris Buschelman   PetscFunctionBegin;
3729566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A, &av));
373a6053eceSJunchao Zhang   mumps->val = (PetscScalar *)av;
374bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
3752205254eSKarl Rupp     nz = aa->nz;
3762205254eSKarl Rupp     ai = aa->i;
3772205254eSKarl Rupp     aj = aa->j;
3789566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
379a6053eceSJunchao Zhang     for (i = k = 0; i < M; i++) {
38016ebf90aSShri Abhyankar       rnz = ai[i + 1] - ai[i];
38167877ebaSShri Abhyankar       ajj = aj + ai[i];
38267877ebaSShri Abhyankar       for (j = 0; j < rnz; j++) {
3839566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
3849566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
385a6053eceSJunchao Zhang         k++;
38616ebf90aSShri Abhyankar       }
38716ebf90aSShri Abhyankar     }
388a6053eceSJunchao Zhang     mumps->irn = row;
389a6053eceSJunchao Zhang     mumps->jcn = col;
390a6053eceSJunchao Zhang     mumps->nnz = nz;
39116ebf90aSShri Abhyankar   }
3929566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
3933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39416ebf90aSShri Abhyankar }
395397b6df1SKris Buschelman 
396d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
397d71ae5a4SJacob Faibussowitsch {
398a6053eceSJunchao Zhang   PetscInt64     nz, i, j, k, r;
3997ee00b23SStefano Zampini   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
400a6053eceSJunchao Zhang   PetscMUMPSInt *row, *col;
4017ee00b23SStefano Zampini 
4027ee00b23SStefano Zampini   PetscFunctionBegin;
403a6053eceSJunchao Zhang   mumps->val = a->val;
4047ee00b23SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
4057ee00b23SStefano Zampini     nz = a->sliidx[a->totalslices];
4069566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
407a6053eceSJunchao Zhang     for (i = k = 0; i < a->totalslices; i++) {
40848a46eb9SPierre Jolivet       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
4097ee00b23SStefano Zampini     }
4109566063dSJacob Faibussowitsch     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
411a6053eceSJunchao Zhang     mumps->irn = row;
412a6053eceSJunchao Zhang     mumps->jcn = col;
413a6053eceSJunchao Zhang     mumps->nnz = nz;
4147ee00b23SStefano Zampini   }
4153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4167ee00b23SStefano Zampini }
4177ee00b23SStefano Zampini 
418d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
419d71ae5a4SJacob Faibussowitsch {
42067877ebaSShri Abhyankar   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
42133d57670SJed Brown   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
422a6053eceSJunchao Zhang   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
423a6053eceSJunchao Zhang   PetscInt        bs;
424a6053eceSJunchao Zhang   PetscMUMPSInt  *row, *col;
42567877ebaSShri Abhyankar 
42667877ebaSShri Abhyankar   PetscFunctionBegin;
4279566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
42833d57670SJed Brown   M          = A->rmap->N / bs;
429a6053eceSJunchao Zhang   mumps->val = aa->a;
430bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
4319371c9d4SSatish Balay     ai = aa->i;
4329371c9d4SSatish Balay     aj = aa->j;
43367877ebaSShri Abhyankar     nz = bs2 * aa->nz;
4349566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
43567877ebaSShri Abhyankar     for (i = 0; i < M; i++) {
43667877ebaSShri Abhyankar       ajj = aj + ai[i];
43767877ebaSShri Abhyankar       rnz = ai[i + 1] - ai[i];
43867877ebaSShri Abhyankar       for (k = 0; k < rnz; k++) {
43967877ebaSShri Abhyankar         for (j = 0; j < bs; j++) {
44067877ebaSShri Abhyankar           for (m = 0; m < bs; m++) {
4419566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
4429566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
443a6053eceSJunchao Zhang             idx++;
44467877ebaSShri Abhyankar           }
44567877ebaSShri Abhyankar         }
44667877ebaSShri Abhyankar       }
44767877ebaSShri Abhyankar     }
448a6053eceSJunchao Zhang     mumps->irn = row;
449a6053eceSJunchao Zhang     mumps->jcn = col;
450a6053eceSJunchao Zhang     mumps->nnz = nz;
45167877ebaSShri Abhyankar   }
4523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
45367877ebaSShri Abhyankar }
45467877ebaSShri Abhyankar 
455d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
456d71ae5a4SJacob Faibussowitsch {
45775480915SPierre Jolivet   const PetscInt *ai, *aj, *ajj;
458a6053eceSJunchao Zhang   PetscInt        bs;
459a6053eceSJunchao Zhang   PetscInt64      nz, rnz, i, j, k, m;
460a6053eceSJunchao Zhang   PetscMUMPSInt  *row, *col;
46175480915SPierre Jolivet   PetscScalar    *val;
46216ebf90aSShri Abhyankar   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
46375480915SPierre Jolivet   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
46438548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
465b94d7dedSBarry Smith   PetscBool isset, hermitian;
46638548759SBarry Smith #endif
46716ebf90aSShri Abhyankar 
46816ebf90aSShri Abhyankar   PetscFunctionBegin;
46938548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
470b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
471b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
47238548759SBarry Smith #endif
4732205254eSKarl Rupp   ai = aa->i;
4742205254eSKarl Rupp   aj = aa->j;
4759566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
47675480915SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
477f3fa974cSJacob Faibussowitsch     const PetscInt64 alloc_size = aa->nz * bs2;
478f3fa974cSJacob Faibussowitsch 
479f3fa974cSJacob Faibussowitsch     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
480a6053eceSJunchao Zhang     if (bs > 1) {
481f3fa974cSJacob Faibussowitsch       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
482a6053eceSJunchao Zhang       mumps->val = mumps->val_alloc;
48375480915SPierre Jolivet     } else {
484a6053eceSJunchao Zhang       mumps->val = aa->a;
48575480915SPierre Jolivet     }
486a6053eceSJunchao Zhang     mumps->irn = row;
487a6053eceSJunchao Zhang     mumps->jcn = col;
488a6053eceSJunchao Zhang   } else {
489a6053eceSJunchao Zhang     if (bs == 1) mumps->val = aa->a;
490a6053eceSJunchao Zhang     row = mumps->irn;
491a6053eceSJunchao Zhang     col = mumps->jcn;
492a6053eceSJunchao Zhang   }
493a6053eceSJunchao Zhang   val = mumps->val;
494185f6596SHong Zhang 
49516ebf90aSShri Abhyankar   nz = 0;
496a81fe166SPierre Jolivet   if (bs > 1) {
49775480915SPierre Jolivet     for (i = 0; i < mbs; i++) {
49816ebf90aSShri Abhyankar       rnz = ai[i + 1] - ai[i];
49967877ebaSShri Abhyankar       ajj = aj + ai[i];
50075480915SPierre Jolivet       for (j = 0; j < rnz; j++) {
50175480915SPierre Jolivet         for (k = 0; k < bs; k++) {
50275480915SPierre Jolivet           for (m = 0; m < bs; m++) {
503ec4f40fdSPierre Jolivet             if (ajj[j] > i || k >= m) {
50475480915SPierre Jolivet               if (reuse == MAT_INITIAL_MATRIX) {
5059566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
5069566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
50775480915SPierre Jolivet               }
50875480915SPierre Jolivet               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
50975480915SPierre Jolivet             }
51075480915SPierre Jolivet           }
51175480915SPierre Jolivet         }
51275480915SPierre Jolivet       }
51375480915SPierre Jolivet     }
514a81fe166SPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) {
515a81fe166SPierre Jolivet     for (i = 0; i < mbs; i++) {
516a81fe166SPierre Jolivet       rnz = ai[i + 1] - ai[i];
517a81fe166SPierre Jolivet       ajj = aj + ai[i];
518a81fe166SPierre Jolivet       for (j = 0; j < rnz; j++) {
5199566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
5209566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
521a6053eceSJunchao Zhang         nz++;
522a81fe166SPierre Jolivet       }
523a81fe166SPierre Jolivet     }
52408401ef6SPierre Jolivet     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
52575480915SPierre Jolivet   }
526a6053eceSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
5273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
52816ebf90aSShri Abhyankar }
52916ebf90aSShri Abhyankar 
530d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
531d71ae5a4SJacob Faibussowitsch {
53267877ebaSShri Abhyankar   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
533a6053eceSJunchao Zhang   PetscInt64         nz, rnz, i, j;
53467877ebaSShri Abhyankar   const PetscScalar *av, *v1;
53516ebf90aSShri Abhyankar   PetscScalar       *val;
536a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
537829b1710SHong Zhang   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
53829b521d4Sstefano_zampini   PetscBool          missing;
53938548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
540b94d7dedSBarry Smith   PetscBool hermitian, isset;
54138548759SBarry Smith #endif
54216ebf90aSShri Abhyankar 
54316ebf90aSShri Abhyankar   PetscFunctionBegin;
54438548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
545b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
546b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
54738548759SBarry Smith #endif
5489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A, &av));
5499371c9d4SSatish Balay   ai    = aa->i;
5509371c9d4SSatish Balay   aj    = aa->j;
55116ebf90aSShri Abhyankar   adiag = aa->diag;
5529566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
553bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
5547ee00b23SStefano Zampini     /* count nz in the upper triangular part of A */
555829b1710SHong Zhang     nz = 0;
55629b521d4Sstefano_zampini     if (missing) {
55729b521d4Sstefano_zampini       for (i = 0; i < M; i++) {
55829b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
55929b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
56029b521d4Sstefano_zampini             if (aj[j] < i) continue;
56129b521d4Sstefano_zampini             nz++;
56229b521d4Sstefano_zampini           }
56329b521d4Sstefano_zampini         } else {
56429b521d4Sstefano_zampini           nz += ai[i + 1] - adiag[i];
56529b521d4Sstefano_zampini         }
56629b521d4Sstefano_zampini       }
56729b521d4Sstefano_zampini     } else {
568829b1710SHong Zhang       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
56929b521d4Sstefano_zampini     }
5709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
5719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
572a6053eceSJunchao Zhang     mumps->nnz = nz;
573a6053eceSJunchao Zhang     mumps->irn = row;
574a6053eceSJunchao Zhang     mumps->jcn = col;
575a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
576185f6596SHong Zhang 
57716ebf90aSShri Abhyankar     nz = 0;
57829b521d4Sstefano_zampini     if (missing) {
57929b521d4Sstefano_zampini       for (i = 0; i < M; i++) {
58029b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
58129b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
58229b521d4Sstefano_zampini             if (aj[j] < i) continue;
5839566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
5849566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
58529b521d4Sstefano_zampini             val[nz] = av[j];
58629b521d4Sstefano_zampini             nz++;
58729b521d4Sstefano_zampini           }
58829b521d4Sstefano_zampini         } else {
58929b521d4Sstefano_zampini           rnz = ai[i + 1] - adiag[i];
59029b521d4Sstefano_zampini           ajj = aj + adiag[i];
59129b521d4Sstefano_zampini           v1  = av + adiag[i];
59229b521d4Sstefano_zampini           for (j = 0; j < rnz; j++) {
5939566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
5949566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
595a6053eceSJunchao Zhang             val[nz++] = v1[j];
59629b521d4Sstefano_zampini           }
59729b521d4Sstefano_zampini         }
59829b521d4Sstefano_zampini       }
59929b521d4Sstefano_zampini     } else {
60016ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
60116ebf90aSShri Abhyankar         rnz = ai[i + 1] - adiag[i];
60267877ebaSShri Abhyankar         ajj = aj + adiag[i];
603cf3759fdSShri Abhyankar         v1  = av + adiag[i];
60467877ebaSShri Abhyankar         for (j = 0; j < rnz; j++) {
6059566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
6069566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
607a6053eceSJunchao Zhang           val[nz++] = v1[j];
60816ebf90aSShri Abhyankar         }
60916ebf90aSShri Abhyankar       }
61029b521d4Sstefano_zampini     }
611397b6df1SKris Buschelman   } else {
612a6053eceSJunchao Zhang     nz  = 0;
613a6053eceSJunchao Zhang     val = mumps->val;
61429b521d4Sstefano_zampini     if (missing) {
61516ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
61629b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
61729b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
61829b521d4Sstefano_zampini             if (aj[j] < i) continue;
61929b521d4Sstefano_zampini             val[nz++] = av[j];
62029b521d4Sstefano_zampini           }
62129b521d4Sstefano_zampini         } else {
62216ebf90aSShri Abhyankar           rnz = ai[i + 1] - adiag[i];
62367877ebaSShri Abhyankar           v1  = av + adiag[i];
624ad540459SPierre Jolivet           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
62516ebf90aSShri Abhyankar         }
62616ebf90aSShri Abhyankar       }
62729b521d4Sstefano_zampini     } else {
62816ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
62916ebf90aSShri Abhyankar         rnz = ai[i + 1] - adiag[i];
63016ebf90aSShri Abhyankar         v1  = av + adiag[i];
631ad540459SPierre Jolivet         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
63216ebf90aSShri Abhyankar       }
63316ebf90aSShri Abhyankar     }
63429b521d4Sstefano_zampini   }
6359566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
6363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
63716ebf90aSShri Abhyankar }
63816ebf90aSShri Abhyankar 
639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
640d71ae5a4SJacob Faibussowitsch {
641a6053eceSJunchao Zhang   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
642a6053eceSJunchao Zhang   PetscInt           bs;
643a6053eceSJunchao Zhang   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
644a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
64516ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
64616ebf90aSShri Abhyankar   PetscScalar       *val;
647397b6df1SKris Buschelman   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
648397b6df1SKris Buschelman   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
649397b6df1SKris Buschelman   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
650ec4f40fdSPierre Jolivet   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
65138548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
652b94d7dedSBarry Smith   PetscBool hermitian, isset;
65338548759SBarry Smith #endif
65416ebf90aSShri Abhyankar 
65516ebf90aSShri Abhyankar   PetscFunctionBegin;
65638548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
657b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
658b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
65938548759SBarry Smith #endif
6609566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
66138548759SBarry Smith   rstart = A->rmap->rstart;
66238548759SBarry Smith   ai     = aa->i;
66338548759SBarry Smith   aj     = aa->j;
66438548759SBarry Smith   bi     = bb->i;
66538548759SBarry Smith   bj     = bb->j;
66638548759SBarry Smith   av     = aa->a;
66738548759SBarry Smith   bv     = bb->a;
668397b6df1SKris Buschelman 
6692205254eSKarl Rupp   garray = mat->garray;
6702205254eSKarl Rupp 
671bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
672a6053eceSJunchao Zhang     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
6739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
6749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
675a6053eceSJunchao Zhang     /* can not decide the exact mumps->nnz now because of the SBAIJ */
676a6053eceSJunchao Zhang     mumps->irn = row;
677a6053eceSJunchao Zhang     mumps->jcn = col;
678a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
679397b6df1SKris Buschelman   } else {
680a6053eceSJunchao Zhang     val = mumps->val;
681397b6df1SKris Buschelman   }
682397b6df1SKris Buschelman 
6839371c9d4SSatish Balay   jj   = 0;
6849371c9d4SSatish Balay   irow = rstart;
685ec4f40fdSPierre Jolivet   for (i = 0; i < mbs; i++) {
686397b6df1SKris Buschelman     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
687397b6df1SKris Buschelman     countA = ai[i + 1] - ai[i];
688397b6df1SKris Buschelman     countB = bi[i + 1] - bi[i];
689397b6df1SKris Buschelman     bjj    = bj + bi[i];
690ec4f40fdSPierre Jolivet     v1     = av + ai[i] * bs2;
691ec4f40fdSPierre Jolivet     v2     = bv + bi[i] * bs2;
692397b6df1SKris Buschelman 
693ec4f40fdSPierre Jolivet     if (bs > 1) {
694ec4f40fdSPierre Jolivet       /* A-part */
695ec4f40fdSPierre Jolivet       for (j = 0; j < countA; j++) {
696ec4f40fdSPierre Jolivet         for (k = 0; k < bs; k++) {
697ec4f40fdSPierre Jolivet           for (m = 0; m < bs; m++) {
698ec4f40fdSPierre Jolivet             if (rstart + ajj[j] * bs > irow || k >= m) {
699ec4f40fdSPierre Jolivet               if (reuse == MAT_INITIAL_MATRIX) {
7009566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
7019566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
702ec4f40fdSPierre Jolivet               }
703ec4f40fdSPierre Jolivet               val[jj++] = v1[j * bs2 + m + k * bs];
704ec4f40fdSPierre Jolivet             }
705ec4f40fdSPierre Jolivet           }
706ec4f40fdSPierre Jolivet         }
707ec4f40fdSPierre Jolivet       }
708ec4f40fdSPierre Jolivet 
709ec4f40fdSPierre Jolivet       /* B-part */
710ec4f40fdSPierre Jolivet       for (j = 0; j < countB; j++) {
711ec4f40fdSPierre Jolivet         for (k = 0; k < bs; k++) {
712ec4f40fdSPierre Jolivet           for (m = 0; m < bs; m++) {
713ec4f40fdSPierre Jolivet             if (reuse == MAT_INITIAL_MATRIX) {
7149566063dSJacob Faibussowitsch               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
7159566063dSJacob Faibussowitsch               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
716ec4f40fdSPierre Jolivet             }
717ec4f40fdSPierre Jolivet             val[jj++] = v2[j * bs2 + m + k * bs];
718ec4f40fdSPierre Jolivet           }
719ec4f40fdSPierre Jolivet         }
720ec4f40fdSPierre Jolivet       }
721ec4f40fdSPierre Jolivet     } else {
722397b6df1SKris Buschelman       /* A-part */
723397b6df1SKris Buschelman       for (j = 0; j < countA; j++) {
724bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
7259566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
7269566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
727397b6df1SKris Buschelman         }
72816ebf90aSShri Abhyankar         val[jj++] = v1[j];
729397b6df1SKris Buschelman       }
73016ebf90aSShri Abhyankar 
73116ebf90aSShri Abhyankar       /* B-part */
73216ebf90aSShri Abhyankar       for (j = 0; j < countB; j++) {
733bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
7349566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
7359566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
736397b6df1SKris Buschelman         }
73716ebf90aSShri Abhyankar         val[jj++] = v2[j];
73816ebf90aSShri Abhyankar       }
73916ebf90aSShri Abhyankar     }
740ec4f40fdSPierre Jolivet     irow += bs;
741ec4f40fdSPierre Jolivet   }
742a6053eceSJunchao Zhang   mumps->nnz = jj;
7433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
74416ebf90aSShri Abhyankar }
74516ebf90aSShri Abhyankar 
746d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
747d71ae5a4SJacob Faibussowitsch {
74816ebf90aSShri Abhyankar   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
749a6053eceSJunchao Zhang   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
750a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
75116ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
75216ebf90aSShri Abhyankar   PetscScalar       *val;
753a3d589ffSStefano Zampini   Mat                Ad, Ao;
754a3d589ffSStefano Zampini   Mat_SeqAIJ        *aa;
755a3d589ffSStefano Zampini   Mat_SeqAIJ        *bb;
75616ebf90aSShri Abhyankar 
75716ebf90aSShri Abhyankar   PetscFunctionBegin;
7589566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
7599566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
7609566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
761a3d589ffSStefano Zampini 
762a3d589ffSStefano Zampini   aa = (Mat_SeqAIJ *)(Ad)->data;
763a3d589ffSStefano Zampini   bb = (Mat_SeqAIJ *)(Ao)->data;
76438548759SBarry Smith   ai = aa->i;
76538548759SBarry Smith   aj = aa->j;
76638548759SBarry Smith   bi = bb->i;
76738548759SBarry Smith   bj = bb->j;
76816ebf90aSShri Abhyankar 
769a3d589ffSStefano Zampini   rstart = A->rmap->rstart;
7702205254eSKarl Rupp 
771bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
772a6053eceSJunchao Zhang     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
7739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
7749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
775a6053eceSJunchao Zhang     mumps->nnz = nz;
776a6053eceSJunchao Zhang     mumps->irn = row;
777a6053eceSJunchao Zhang     mumps->jcn = col;
778a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
77916ebf90aSShri Abhyankar   } else {
780a6053eceSJunchao Zhang     val = mumps->val;
78116ebf90aSShri Abhyankar   }
78216ebf90aSShri Abhyankar 
7839371c9d4SSatish Balay   jj   = 0;
7849371c9d4SSatish Balay   irow = rstart;
78516ebf90aSShri Abhyankar   for (i = 0; i < m; i++) {
78616ebf90aSShri Abhyankar     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
78716ebf90aSShri Abhyankar     countA = ai[i + 1] - ai[i];
78816ebf90aSShri Abhyankar     countB = bi[i + 1] - bi[i];
78916ebf90aSShri Abhyankar     bjj    = bj + bi[i];
79016ebf90aSShri Abhyankar     v1     = av + ai[i];
79116ebf90aSShri Abhyankar     v2     = bv + bi[i];
79216ebf90aSShri Abhyankar 
79316ebf90aSShri Abhyankar     /* A-part */
79416ebf90aSShri Abhyankar     for (j = 0; j < countA; j++) {
795bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
7969566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
7979566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
79816ebf90aSShri Abhyankar       }
79916ebf90aSShri Abhyankar       val[jj++] = v1[j];
80016ebf90aSShri Abhyankar     }
80116ebf90aSShri Abhyankar 
80216ebf90aSShri Abhyankar     /* B-part */
80316ebf90aSShri Abhyankar     for (j = 0; j < countB; j++) {
804bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
8059566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
8069566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
80716ebf90aSShri Abhyankar       }
80816ebf90aSShri Abhyankar       val[jj++] = v2[j];
80916ebf90aSShri Abhyankar     }
81016ebf90aSShri Abhyankar     irow++;
81116ebf90aSShri Abhyankar   }
8129566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
8139566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
8143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81516ebf90aSShri Abhyankar }
81616ebf90aSShri Abhyankar 
817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
818d71ae5a4SJacob Faibussowitsch {
81967877ebaSShri Abhyankar   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
82067877ebaSShri Abhyankar   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
82167877ebaSShri Abhyankar   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
82267877ebaSShri Abhyankar   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
823d985c460SShri Abhyankar   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
82433d57670SJed Brown   const PetscInt     bs2 = mat->bs2;
825a6053eceSJunchao Zhang   PetscInt           bs;
826a6053eceSJunchao Zhang   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
827a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
82867877ebaSShri Abhyankar   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
82967877ebaSShri Abhyankar   PetscScalar       *val;
83067877ebaSShri Abhyankar 
83167877ebaSShri Abhyankar   PetscFunctionBegin;
8329566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
833bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
83467877ebaSShri Abhyankar     nz = bs2 * (aa->nz + bb->nz);
8359566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
8369566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
837a6053eceSJunchao Zhang     mumps->nnz = nz;
838a6053eceSJunchao Zhang     mumps->irn = row;
839a6053eceSJunchao Zhang     mumps->jcn = col;
840a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
84167877ebaSShri Abhyankar   } else {
842a6053eceSJunchao Zhang     val = mumps->val;
84367877ebaSShri Abhyankar   }
84467877ebaSShri Abhyankar 
8459371c9d4SSatish Balay   jj   = 0;
8469371c9d4SSatish Balay   irow = rstart;
84767877ebaSShri Abhyankar   for (i = 0; i < mbs; i++) {
84867877ebaSShri Abhyankar     countA = ai[i + 1] - ai[i];
84967877ebaSShri Abhyankar     countB = bi[i + 1] - bi[i];
85067877ebaSShri Abhyankar     ajj    = aj + ai[i];
85167877ebaSShri Abhyankar     bjj    = bj + bi[i];
85267877ebaSShri Abhyankar     v1     = av + bs2 * ai[i];
85367877ebaSShri Abhyankar     v2     = bv + bs2 * bi[i];
85467877ebaSShri Abhyankar 
85567877ebaSShri Abhyankar     idx = 0;
85667877ebaSShri Abhyankar     /* A-part */
85767877ebaSShri Abhyankar     for (k = 0; k < countA; k++) {
85867877ebaSShri Abhyankar       for (j = 0; j < bs; j++) {
85967877ebaSShri Abhyankar         for (n = 0; n < bs; n++) {
860bccb9932SShri Abhyankar           if (reuse == MAT_INITIAL_MATRIX) {
8619566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
8629566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
86367877ebaSShri Abhyankar           }
86467877ebaSShri Abhyankar           val[jj++] = v1[idx++];
86567877ebaSShri Abhyankar         }
86667877ebaSShri Abhyankar       }
86767877ebaSShri Abhyankar     }
86867877ebaSShri Abhyankar 
86967877ebaSShri Abhyankar     idx = 0;
87067877ebaSShri Abhyankar     /* B-part */
87167877ebaSShri Abhyankar     for (k = 0; k < countB; k++) {
87267877ebaSShri Abhyankar       for (j = 0; j < bs; j++) {
87367877ebaSShri Abhyankar         for (n = 0; n < bs; n++) {
874bccb9932SShri Abhyankar           if (reuse == MAT_INITIAL_MATRIX) {
8759566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
8769566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
87767877ebaSShri Abhyankar           }
878d985c460SShri Abhyankar           val[jj++] = v2[idx++];
87967877ebaSShri Abhyankar         }
88067877ebaSShri Abhyankar       }
88167877ebaSShri Abhyankar     }
882d985c460SShri Abhyankar     irow += bs;
88367877ebaSShri Abhyankar   }
8843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
88567877ebaSShri Abhyankar }
88667877ebaSShri Abhyankar 
887d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
888d71ae5a4SJacob Faibussowitsch {
88916ebf90aSShri Abhyankar   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
890a6053eceSJunchao Zhang   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
891a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
89216ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
89316ebf90aSShri Abhyankar   PetscScalar       *val;
894a3d589ffSStefano Zampini   Mat                Ad, Ao;
895a3d589ffSStefano Zampini   Mat_SeqAIJ        *aa;
896a3d589ffSStefano Zampini   Mat_SeqAIJ        *bb;
89738548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
898b94d7dedSBarry Smith   PetscBool hermitian, isset;
89938548759SBarry Smith #endif
90016ebf90aSShri Abhyankar 
90116ebf90aSShri Abhyankar   PetscFunctionBegin;
90238548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
903b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
904b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
90538548759SBarry Smith #endif
9069566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
9079566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
9089566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
909a3d589ffSStefano Zampini 
910a3d589ffSStefano Zampini   aa    = (Mat_SeqAIJ *)(Ad)->data;
911a3d589ffSStefano Zampini   bb    = (Mat_SeqAIJ *)(Ao)->data;
91238548759SBarry Smith   ai    = aa->i;
91338548759SBarry Smith   aj    = aa->j;
91438548759SBarry Smith   adiag = aa->diag;
91538548759SBarry Smith   bi    = bb->i;
91638548759SBarry Smith   bj    = bb->j;
9172205254eSKarl Rupp 
91816ebf90aSShri Abhyankar   rstart = A->rmap->rstart;
91916ebf90aSShri Abhyankar 
920bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
921e0bace9bSHong Zhang     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
922e0bace9bSHong Zhang     nzb = 0; /* num of upper triangular entries in mat->B */
92316ebf90aSShri Abhyankar     for (i = 0; i < m; i++) {
924e0bace9bSHong Zhang       nza += (ai[i + 1] - adiag[i]);
92516ebf90aSShri Abhyankar       countB = bi[i + 1] - bi[i];
92616ebf90aSShri Abhyankar       bjj    = bj + bi[i];
927e0bace9bSHong Zhang       for (j = 0; j < countB; j++) {
928e0bace9bSHong Zhang         if (garray[bjj[j]] > rstart) nzb++;
929e0bace9bSHong Zhang       }
930e0bace9bSHong Zhang     }
93116ebf90aSShri Abhyankar 
932e0bace9bSHong Zhang     nz = nza + nzb; /* total nz of upper triangular part of mat */
9339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
9349566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
935a6053eceSJunchao Zhang     mumps->nnz = nz;
936a6053eceSJunchao Zhang     mumps->irn = row;
937a6053eceSJunchao Zhang     mumps->jcn = col;
938a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
93916ebf90aSShri Abhyankar   } else {
940a6053eceSJunchao Zhang     val = mumps->val;
94116ebf90aSShri Abhyankar   }
94216ebf90aSShri Abhyankar 
9439371c9d4SSatish Balay   jj   = 0;
9449371c9d4SSatish Balay   irow = rstart;
94516ebf90aSShri Abhyankar   for (i = 0; i < m; i++) {
94616ebf90aSShri Abhyankar     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
94716ebf90aSShri Abhyankar     v1     = av + adiag[i];
94816ebf90aSShri Abhyankar     countA = ai[i + 1] - adiag[i];
94916ebf90aSShri Abhyankar     countB = bi[i + 1] - bi[i];
95016ebf90aSShri Abhyankar     bjj    = bj + bi[i];
95116ebf90aSShri Abhyankar     v2     = bv + bi[i];
95216ebf90aSShri Abhyankar 
95316ebf90aSShri Abhyankar     /* A-part */
95416ebf90aSShri Abhyankar     for (j = 0; j < countA; j++) {
955bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
9569566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
9579566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
95816ebf90aSShri Abhyankar       }
95916ebf90aSShri Abhyankar       val[jj++] = v1[j];
96016ebf90aSShri Abhyankar     }
96116ebf90aSShri Abhyankar 
96216ebf90aSShri Abhyankar     /* B-part */
96316ebf90aSShri Abhyankar     for (j = 0; j < countB; j++) {
96416ebf90aSShri Abhyankar       if (garray[bjj[j]] > rstart) {
965bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
9669566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
9679566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
96816ebf90aSShri Abhyankar         }
96916ebf90aSShri Abhyankar         val[jj++] = v2[j];
97016ebf90aSShri Abhyankar       }
971397b6df1SKris Buschelman     }
972397b6df1SKris Buschelman     irow++;
973397b6df1SKris Buschelman   }
9749566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
9759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
9763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
977397b6df1SKris Buschelman }
978397b6df1SKris Buschelman 
979d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A)
980d71ae5a4SJacob Faibussowitsch {
981a6053eceSJunchao Zhang   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
982b24902e0SBarry Smith 
983397b6df1SKris Buschelman   PetscFunctionBegin;
9849566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
9859566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
9869566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mumps->scat_sol));
9879566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mumps->b_seq));
9889566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mumps->x_seq));
9899566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.perm_in));
9909566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
9919566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->val_alloc));
9929566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->info));
993413bcc21SPierre Jolivet   PetscCall(PetscFree(mumps->ICNTL_pre));
994413bcc21SPierre Jolivet   PetscCall(PetscFree(mumps->CNTL_pre));
9959566063dSJacob Faibussowitsch   PetscCall(MatMumpsResetSchur_Private(mumps));
996413bcc21SPierre Jolivet   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
997a5e57a09SHong Zhang     mumps->id.job = JOB_END;
9983ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
99908401ef6SPierre Jolivet     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1000413bcc21SPierre Jolivet     if (mumps->mumps_comm != MPI_COMM_NULL) {
1001413bcc21SPierre Jolivet       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1002413bcc21SPierre Jolivet       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1003413bcc21SPierre Jolivet     }
1004413bcc21SPierre Jolivet   }
10053ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
100667602552SJunchao Zhang   if (mumps->use_petsc_omp_support) {
10079566063dSJacob Faibussowitsch     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
10089566063dSJacob Faibussowitsch     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
10099566063dSJacob Faibussowitsch     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
101067602552SJunchao Zhang   }
10113ab56b82SJunchao Zhang #endif
10129566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->ia_alloc));
10139566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->ja_alloc));
10149566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->recvcount));
10159566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->reqs));
10169566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->irhs_loc));
10179566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1018bf0cc555SLisandro Dalcin 
101997969023SHong Zhang   /* clear composed functions */
10209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
10229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
10239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
10249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
10259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
10269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
10279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
10289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
10299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
10315c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
10329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
10339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
10343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1035397b6df1SKris Buschelman }
1036397b6df1SKris Buschelman 
103767602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1038d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1039d71ae5a4SJacob Faibussowitsch {
104067602552SJunchao Zhang   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
104167602552SJunchao Zhang   const PetscMPIInt ompsize = mumps->omp_comm_size;
104267602552SJunchao Zhang   PetscInt          i, m, M, rstart;
104367602552SJunchao Zhang 
104467602552SJunchao Zhang   PetscFunctionBegin;
10459566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &M, NULL));
10469566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &m, NULL));
104708401ef6SPierre Jolivet   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
104867602552SJunchao Zhang   if (ompsize == 1) {
104967602552SJunchao Zhang     if (!mumps->irhs_loc) {
105067602552SJunchao Zhang       mumps->nloc_rhs = m;
10519566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
10529566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
105367602552SJunchao Zhang       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
105467602552SJunchao Zhang     }
105567602552SJunchao Zhang     mumps->id.rhs_loc = (MumpsScalar *)array;
105667602552SJunchao Zhang   } else {
105767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
105867602552SJunchao Zhang     const PetscInt *ranges;
105967602552SJunchao Zhang     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
106067602552SJunchao Zhang     MPI_Group       petsc_group, omp_group;
106167602552SJunchao Zhang     PetscScalar    *recvbuf = NULL;
106267602552SJunchao Zhang 
106367602552SJunchao Zhang     if (mumps->is_omp_master) {
106467602552SJunchao Zhang       /* Lazily initialize the omp stuff for distributed rhs */
106567602552SJunchao Zhang       if (!mumps->irhs_loc) {
10669566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
10679566063dSJacob Faibussowitsch         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
10689566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
10699566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
107067602552SJunchao Zhang         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
10719566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
107267602552SJunchao Zhang 
107367602552SJunchao Zhang         /* Populate mumps->irhs_loc[], rhs_nrow[] */
107467602552SJunchao Zhang         mumps->nloc_rhs = 0;
10759566063dSJacob Faibussowitsch         PetscCall(MatGetOwnershipRanges(A, &ranges));
107667602552SJunchao Zhang         for (j = 0; j < ompsize; j++) {
107767602552SJunchao Zhang           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
107867602552SJunchao Zhang           mumps->nloc_rhs += mumps->rhs_nrow[j];
107967602552SJunchao Zhang         }
10809566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
108167602552SJunchao Zhang         for (j = k = 0; j < ompsize; j++) {
108267602552SJunchao Zhang           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
108367602552SJunchao Zhang         }
108467602552SJunchao Zhang 
10859566063dSJacob Faibussowitsch         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
10869566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_free(&petsc_group));
10879566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_free(&omp_group));
108867602552SJunchao Zhang       }
108967602552SJunchao Zhang 
109067602552SJunchao Zhang       /* Realloc buffers when current nrhs is bigger than what we have met */
109167602552SJunchao Zhang       if (nrhs > mumps->max_nrhs) {
10929566063dSJacob Faibussowitsch         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
10939566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
109467602552SJunchao Zhang         mumps->max_nrhs = nrhs;
109567602552SJunchao Zhang       }
109667602552SJunchao Zhang 
109767602552SJunchao Zhang       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
10989566063dSJacob Faibussowitsch       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
109967602552SJunchao Zhang       mumps->rhs_disps[0] = 0;
110067602552SJunchao Zhang       for (j = 1; j < ompsize; j++) {
110167602552SJunchao Zhang         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
110208401ef6SPierre Jolivet         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
110367602552SJunchao Zhang       }
110467602552SJunchao Zhang       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
110567602552SJunchao Zhang     }
110667602552SJunchao Zhang 
11079566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
11089566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
110967602552SJunchao Zhang 
111067602552SJunchao Zhang     if (mumps->is_omp_master) {
111167602552SJunchao Zhang       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
111267602552SJunchao Zhang         PetscScalar *dst, *dstbase = mumps->rhs_loc;
111367602552SJunchao Zhang         for (j = 0; j < ompsize; j++) {
111467602552SJunchao Zhang           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
111567602552SJunchao Zhang           dst                    = dstbase;
111667602552SJunchao Zhang           for (i = 0; i < nrhs; i++) {
11179566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
111867602552SJunchao Zhang             src += mumps->rhs_nrow[j];
111967602552SJunchao Zhang             dst += mumps->nloc_rhs;
112067602552SJunchao Zhang           }
112167602552SJunchao Zhang           dstbase += mumps->rhs_nrow[j];
112267602552SJunchao Zhang         }
112367602552SJunchao Zhang       }
112467602552SJunchao Zhang       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
112567602552SJunchao Zhang     }
112667602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */
112767602552SJunchao Zhang   }
112867602552SJunchao Zhang   mumps->id.nrhs     = nrhs;
112967602552SJunchao Zhang   mumps->id.nloc_rhs = mumps->nloc_rhs;
113067602552SJunchao Zhang   mumps->id.lrhs_loc = mumps->nloc_rhs;
113167602552SJunchao Zhang   mumps->id.irhs_loc = mumps->irhs_loc;
11323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
113367602552SJunchao Zhang }
113467602552SJunchao Zhang 
1135d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1136d71ae5a4SJacob Faibussowitsch {
1137e69c285eSBarry Smith   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
113825aac85cSJunchao Zhang   const PetscScalar *rarray = NULL;
1139d54de34fSKris Buschelman   PetscScalar       *array;
1140329ec9b3SHong Zhang   IS                 is_iden, is_petsc;
1141329ec9b3SHong Zhang   PetscInt           i;
1142cc86f929SStefano Zampini   PetscBool          second_solve = PETSC_FALSE;
1143883f2eb9SBarry Smith   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1144397b6df1SKris Buschelman 
1145397b6df1SKris Buschelman   PetscFunctionBegin;
11469371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
11479371c9d4SSatish Balay                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
11489371c9d4SSatish Balay                                    &cite1));
11499371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
11509371c9d4SSatish Balay                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
11519371c9d4SSatish Balay                                    &cite2));
11522aca8efcSHong Zhang 
1153603e8f96SBarry Smith   if (A->factorerrortype) {
11549566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
11559566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
11563ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
11572aca8efcSHong Zhang   }
11582aca8efcSHong Zhang 
1159a5e57a09SHong Zhang   mumps->id.nrhs = 1;
11602d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
116125aac85cSJunchao Zhang     if (mumps->ICNTL20 == 10) {
116267602552SJunchao Zhang       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
11639566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(b, &rarray));
11649566063dSJacob Faibussowitsch       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
116525aac85cSJunchao Zhang     } else {
116641ffd417SStefano Zampini       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
11679566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
11689566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
116967602552SJunchao Zhang       if (!mumps->myid) {
11709566063dSJacob Faibussowitsch         PetscCall(VecGetArray(mumps->b_seq, &array));
117167602552SJunchao Zhang         mumps->id.rhs = (MumpsScalar *)array;
117267602552SJunchao Zhang       }
117325aac85cSJunchao Zhang     }
11743ab56b82SJunchao Zhang   } else {                   /* petsc_size == 1 */
117567602552SJunchao Zhang     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
11769566063dSJacob Faibussowitsch     PetscCall(VecCopy(b, x));
11779566063dSJacob Faibussowitsch     PetscCall(VecGetArray(x, &array));
1178940cd9d6SSatish Balay     mumps->id.rhs = (MumpsScalar *)array;
1179397b6df1SKris Buschelman   }
1180397b6df1SKris Buschelman 
1181cc86f929SStefano Zampini   /*
1182cc86f929SStefano Zampini      handle condensation step of Schur complement (if any)
1183cc86f929SStefano Zampini      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1184cc86f929SStefano Zampini      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1185cc86f929SStefano Zampini      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1186cc86f929SStefano Zampini      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1187cc86f929SStefano Zampini   */
1188583f777eSStefano Zampini   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
118908401ef6SPierre Jolivet     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1190cc86f929SStefano Zampini     second_solve = PETSC_TRUE;
11919566063dSJacob Faibussowitsch     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1192cc86f929SStefano Zampini   }
1193397b6df1SKris Buschelman   /* solve phase */
1194a5e57a09SHong Zhang   mumps->id.job = JOB_SOLVE;
11953ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
119608401ef6SPierre Jolivet   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1197397b6df1SKris Buschelman 
1198b5fa320bSStefano Zampini   /* handle expansion step of Schur complement (if any) */
11991baa6e33SBarry Smith   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1200b5fa320bSStefano Zampini 
12012d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1202a5e57a09SHong Zhang     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1203a5e57a09SHong Zhang       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
12049566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1205397b6df1SKris Buschelman     }
1206a5e57a09SHong Zhang     if (!mumps->scat_sol) { /* create scatter scat_sol */
1207a6053eceSJunchao Zhang       PetscInt *isol2_loc = NULL;
12089566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
12099566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1210a6053eceSJunchao Zhang       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
12119566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
12129566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
12139566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_iden));
12149566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_petsc));
1215a5e57a09SHong Zhang       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1216397b6df1SKris Buschelman     }
1217a5e57a09SHong Zhang 
12189566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
12199566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1220329ec9b3SHong Zhang   }
1221353d7d71SJunchao Zhang 
122267602552SJunchao Zhang   if (mumps->petsc_size > 1) {
122325aac85cSJunchao Zhang     if (mumps->ICNTL20 == 10) {
12249566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(b, &rarray));
122525aac85cSJunchao Zhang     } else if (!mumps->myid) {
12269566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mumps->b_seq, &array));
122725aac85cSJunchao Zhang     }
12289566063dSJacob Faibussowitsch   } else PetscCall(VecRestoreArray(x, &array));
1229353d7d71SJunchao Zhang 
12309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
12313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1232397b6df1SKris Buschelman }
1233397b6df1SKris Buschelman 
1234d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1235d71ae5a4SJacob Faibussowitsch {
1236e69c285eSBarry Smith   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1237*338d3105SPierre Jolivet   const PetscMUMPSInt value = mumps->id.ICNTL(9);
123851d5961aSHong Zhang 
123951d5961aSHong Zhang   PetscFunctionBegin;
1240a5e57a09SHong Zhang   mumps->id.ICNTL(9) = 0;
12419566063dSJacob Faibussowitsch   PetscCall(MatSolve_MUMPS(A, b, x));
1242*338d3105SPierre Jolivet   mumps->id.ICNTL(9) = value;
12433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
124451d5961aSHong Zhang }
124551d5961aSHong Zhang 
1246d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1247d71ae5a4SJacob Faibussowitsch {
1248b8491c3eSStefano Zampini   Mat                Bt = NULL;
1249a6053eceSJunchao Zhang   PetscBool          denseX, denseB, flg, flgT;
1250e69c285eSBarry Smith   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1251334c5f61SHong Zhang   PetscInt           i, nrhs, M;
12521683a169SBarry Smith   PetscScalar       *array;
12531683a169SBarry Smith   const PetscScalar *rbray;
1254a6053eceSJunchao Zhang   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1255a6053eceSJunchao Zhang   PetscMUMPSInt     *isol_loc, *isol_loc_save;
12561683a169SBarry Smith   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1257be818407SHong Zhang   IS                 is_to, is_from;
1258beae5ec0SHong Zhang   PetscInt           k, proc, j, m, myrstart;
1259be818407SHong Zhang   const PetscInt    *rstart;
126067602552SJunchao Zhang   Vec                v_mpi, msol_loc;
126167602552SJunchao Zhang   VecScatter         scat_sol;
126267602552SJunchao Zhang   Vec                b_seq;
126367602552SJunchao Zhang   VecScatter         scat_rhs;
1264be818407SHong Zhang   PetscScalar       *aa;
1265be818407SHong Zhang   PetscInt           spnr, *ia, *ja;
1266d56c302dSHong Zhang   Mat_MPIAIJ        *b = NULL;
1267bda8bf91SBarry Smith 
1268e0b74bf9SHong Zhang   PetscFunctionBegin;
12699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
127028b400f6SJacob Faibussowitsch   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1271be818407SHong Zhang 
12729566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1273a6053eceSJunchao Zhang   if (denseB) {
127408401ef6SPierre Jolivet     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1275be818407SHong Zhang     mumps->id.ICNTL(20) = 0; /* dense RHS */
12760e6b8875SHong Zhang   } else {                   /* sparse B */
127708401ef6SPierre Jolivet     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1278013e2dc7SBarry Smith     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1279da81f932SPierre Jolivet     if (flgT) { /* input B is transpose of actual RHS matrix,
12800e6b8875SHong Zhang                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
12819566063dSJacob Faibussowitsch       PetscCall(MatTransposeGetMat(B, &Bt));
1282013e2dc7SBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1283be818407SHong Zhang     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1284b8491c3eSStefano Zampini   }
128587b22cf4SHong Zhang 
12869566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &nrhs));
12879481e6e9SHong Zhang   mumps->id.nrhs = nrhs;
12889481e6e9SHong Zhang   mumps->id.lrhs = M;
12892b691707SHong Zhang   mumps->id.rhs  = NULL;
12909481e6e9SHong Zhang 
12912d4298aeSJunchao Zhang   if (mumps->petsc_size == 1) {
1292b8491c3eSStefano Zampini     PetscScalar *aa;
1293b8491c3eSStefano Zampini     PetscInt     spnr, *ia, *ja;
1294e94cce23SStefano Zampini     PetscBool    second_solve = PETSC_FALSE;
1295b8491c3eSStefano Zampini 
12969566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(X, &array));
1297b8491c3eSStefano Zampini     mumps->id.rhs = (MumpsScalar *)array;
12982b691707SHong Zhang 
1299a6053eceSJunchao Zhang     if (denseB) {
13002b691707SHong Zhang       /* copy B to X */
13019566063dSJacob Faibussowitsch       PetscCall(MatDenseGetArrayRead(B, &rbray));
13029566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
13039566063dSJacob Faibussowitsch       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
13042b691707SHong Zhang     } else { /* sparse B */
13059566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArray(Bt, &aa));
13069566063dSJacob Faibussowitsch       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
130728b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
13089566063dSJacob Faibussowitsch       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1309b8491c3eSStefano Zampini       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1310b8491c3eSStefano Zampini     }
1311e94cce23SStefano Zampini     /* handle condensation step of Schur complement (if any) */
1312583f777eSStefano Zampini     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1313e94cce23SStefano Zampini       second_solve = PETSC_TRUE;
13149566063dSJacob Faibussowitsch       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1315e94cce23SStefano Zampini     }
13162cd7d884SHong Zhang     /* solve phase */
13172cd7d884SHong Zhang     mumps->id.job = JOB_SOLVE;
13183ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
131908401ef6SPierre Jolivet     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1320b5fa320bSStefano Zampini 
1321b5fa320bSStefano Zampini     /* handle expansion step of Schur complement (if any) */
13221baa6e33SBarry Smith     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1323a6053eceSJunchao Zhang     if (!denseB) { /* sparse B */
13249566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
13259566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
132628b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1327b8491c3eSStefano Zampini     }
13289566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(X, &array));
13293ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1330be818407SHong Zhang   }
1331801fbe65SHong Zhang 
13322ef1f0ffSBarry Smith   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1333aed4548fSBarry Smith   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1334241dbb5eSStefano Zampini 
1335beae5ec0SHong Zhang   /* create msol_loc to hold mumps local solution */
13361683a169SBarry Smith   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
13371683a169SBarry Smith   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1338801fbe65SHong Zhang 
1339a1dfcbd9SJunchao Zhang   lsol_loc  = mumps->id.lsol_loc;
134071aed81dSHong Zhang   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
13419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1342940cd9d6SSatish Balay   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1343801fbe65SHong Zhang   mumps->id.isol_loc = isol_loc;
1344801fbe65SHong Zhang 
13459566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
13462cd7d884SHong Zhang 
134767602552SJunchao Zhang   if (denseB) {
134825aac85cSJunchao Zhang     if (mumps->ICNTL20 == 10) {
134967602552SJunchao Zhang       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
13509566063dSJacob Faibussowitsch       PetscCall(MatDenseGetArrayRead(B, &rbray));
13519566063dSJacob Faibussowitsch       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
13529566063dSJacob Faibussowitsch       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
13539566063dSJacob Faibussowitsch       PetscCall(MatGetLocalSize(B, &m, NULL));
13549566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
135525aac85cSJunchao Zhang     } else {
135625aac85cSJunchao Zhang       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
135780577c12SJunchao Zhang       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
135880577c12SJunchao Zhang         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
135980577c12SJunchao Zhang         0, re-arrange B into desired order, which is a local operation.
136080577c12SJunchao Zhang       */
136180577c12SJunchao Zhang 
136267602552SJunchao Zhang       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1363be818407SHong Zhang       /* wrap dense rhs matrix B into a vector v_mpi */
13649566063dSJacob Faibussowitsch       PetscCall(MatGetLocalSize(B, &m, NULL));
13659566063dSJacob Faibussowitsch       PetscCall(MatDenseGetArray(B, &bray));
13669566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
13679566063dSJacob Faibussowitsch       PetscCall(MatDenseRestoreArray(B, &bray));
13682b691707SHong Zhang 
1369be818407SHong Zhang       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1370801fbe65SHong Zhang       if (!mumps->myid) {
1371beae5ec0SHong Zhang         PetscInt *idx;
1372beae5ec0SHong Zhang         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
13739566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nrhs * M, &idx));
13749566063dSJacob Faibussowitsch         PetscCall(MatGetOwnershipRanges(B, &rstart));
1375be818407SHong Zhang         k = 0;
13762d4298aeSJunchao Zhang         for (proc = 0; proc < mumps->petsc_size; proc++) {
1377be818407SHong Zhang           for (j = 0; j < nrhs; j++) {
1378beae5ec0SHong Zhang             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1379be818407SHong Zhang           }
1380be818407SHong Zhang         }
1381be818407SHong Zhang 
13829566063dSJacob Faibussowitsch         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
13839566063dSJacob Faibussowitsch         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
13849566063dSJacob Faibussowitsch         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1385801fbe65SHong Zhang       } else {
13869566063dSJacob Faibussowitsch         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
13879566063dSJacob Faibussowitsch         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
13889566063dSJacob Faibussowitsch         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1389801fbe65SHong Zhang       }
13909566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
13919566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
13929566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_to));
13939566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_from));
13949566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1395801fbe65SHong Zhang 
1396801fbe65SHong Zhang       if (!mumps->myid) { /* define rhs on the host */
13979566063dSJacob Faibussowitsch         PetscCall(VecGetArray(b_seq, &bray));
1398940cd9d6SSatish Balay         mumps->id.rhs = (MumpsScalar *)bray;
13999566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(b_seq, &bray));
1400801fbe65SHong Zhang       }
140125aac85cSJunchao Zhang     }
14022b691707SHong Zhang   } else { /* sparse B */
14032b691707SHong Zhang     b = (Mat_MPIAIJ *)Bt->data;
14042b691707SHong Zhang 
1405be818407SHong Zhang     /* wrap dense X into a vector v_mpi */
14069566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(X, &m, NULL));
14079566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(X, &bray));
14089566063dSJacob Faibussowitsch     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
14099566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(X, &bray));
14102b691707SHong Zhang 
14112b691707SHong Zhang     if (!mumps->myid) {
14129566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArray(b->A, &aa));
14139566063dSJacob Faibussowitsch       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
141428b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
14159566063dSJacob Faibussowitsch       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
14162b691707SHong Zhang       mumps->id.rhs_sparse = (MumpsScalar *)aa;
14172b691707SHong Zhang     } else {
14182b691707SHong Zhang       mumps->id.irhs_ptr    = NULL;
14192b691707SHong Zhang       mumps->id.irhs_sparse = NULL;
14202b691707SHong Zhang       mumps->id.nz_rhs      = 0;
14212b691707SHong Zhang       mumps->id.rhs_sparse  = NULL;
14222b691707SHong Zhang     }
14232b691707SHong Zhang   }
14242b691707SHong Zhang 
1425801fbe65SHong Zhang   /* solve phase */
1426801fbe65SHong Zhang   mumps->id.job = JOB_SOLVE;
14273ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
142808401ef6SPierre Jolivet   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1429801fbe65SHong Zhang 
1430334c5f61SHong Zhang   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
14319566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &array));
14329566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(v_mpi, array));
1433801fbe65SHong Zhang 
1434334c5f61SHong Zhang   /* create scatter scat_sol */
14359566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(X, &rstart));
1436beae5ec0SHong Zhang   /* iidx: index for scatter mumps solution to petsc X */
1437beae5ec0SHong Zhang 
14389566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
14399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1440beae5ec0SHong Zhang   for (i = 0; i < lsol_loc; i++) {
1441beae5ec0SHong Zhang     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1442beae5ec0SHong Zhang 
14432d4298aeSJunchao Zhang     for (proc = 0; proc < mumps->petsc_size; proc++) {
1444beae5ec0SHong Zhang       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1445beae5ec0SHong Zhang         myrstart = rstart[proc];
1446beae5ec0SHong Zhang         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1447beae5ec0SHong Zhang         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1448beae5ec0SHong Zhang         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1449beae5ec0SHong Zhang         break;
1450be818407SHong Zhang       }
1451be818407SHong Zhang     }
1452be818407SHong Zhang 
1453beae5ec0SHong Zhang     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1454801fbe65SHong Zhang   }
14559566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
14569566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
14579566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
14589566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&is_from));
14599566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&is_to));
14609566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
14619566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &array));
146271aed81dSHong Zhang 
146371aed81dSHong Zhang   /* free spaces */
14641683a169SBarry Smith   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
146571aed81dSHong Zhang   mumps->id.isol_loc = isol_loc_save;
146671aed81dSHong Zhang 
14679566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sol_loc, isol_loc));
14689566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxx));
14699566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&msol_loc));
14709566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&v_mpi));
1471a6053eceSJunchao Zhang   if (!denseB) {
14722b691707SHong Zhang     if (!mumps->myid) {
1473d56c302dSHong Zhang       b = (Mat_MPIAIJ *)Bt->data;
14749566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
14759566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
147628b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
14772b691707SHong Zhang     }
14782b691707SHong Zhang   } else {
147925aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) {
14809566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b_seq));
14819566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&scat_rhs));
148225aac85cSJunchao Zhang     }
14832b691707SHong Zhang   }
14849566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&scat_sol));
14859566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
14863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1487e0b74bf9SHong Zhang }
1488e0b74bf9SHong Zhang 
1489d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1490d71ae5a4SJacob Faibussowitsch {
1491b18964edSHong Zhang   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1492*338d3105SPierre Jolivet   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1493b18964edSHong Zhang 
1494b18964edSHong Zhang   PetscFunctionBegin;
1495b18964edSHong Zhang   mumps->id.ICNTL(9) = 0;
1496b18964edSHong Zhang   PetscCall(MatMatSolve_MUMPS(A, B, X));
1497*338d3105SPierre Jolivet   mumps->id.ICNTL(9) = value;
14983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1499b18964edSHong Zhang }
1500b18964edSHong Zhang 
1501d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1502d71ae5a4SJacob Faibussowitsch {
1503eb3ef3b2SHong Zhang   PetscBool flg;
1504eb3ef3b2SHong Zhang   Mat       B;
1505eb3ef3b2SHong Zhang 
1506eb3ef3b2SHong Zhang   PetscFunctionBegin;
15079566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
150828b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1509eb3ef3b2SHong Zhang 
1510eb3ef3b2SHong Zhang   /* Create B=Bt^T that uses Bt's data structure */
15119566063dSJacob Faibussowitsch   PetscCall(MatCreateTranspose(Bt, &B));
1512eb3ef3b2SHong Zhang 
15139566063dSJacob Faibussowitsch   PetscCall(MatMatSolve_MUMPS(A, B, X));
15149566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
15153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1516eb3ef3b2SHong Zhang }
1517eb3ef3b2SHong Zhang 
1518ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX)
1519a58c3f20SHong Zhang /*
1520a58c3f20SHong Zhang   input:
1521a58c3f20SHong Zhang    F:        numeric factor
1522a58c3f20SHong Zhang   output:
1523a58c3f20SHong Zhang    nneg:     total number of negative pivots
152419d49a3bSHong Zhang    nzero:    total number of zero pivots
152519d49a3bSHong Zhang    npos:     (global dimension of F) - nneg - nzero
1526a58c3f20SHong Zhang */
1527d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1528d71ae5a4SJacob Faibussowitsch {
1529e69c285eSBarry Smith   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1530c1490034SHong Zhang   PetscMPIInt size;
1531a58c3f20SHong Zhang 
1532a58c3f20SHong Zhang   PetscFunctionBegin;
15339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1534bcb30aebSHong Zhang   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1535aed4548fSBarry Smith   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1536ed85ac9fSHong Zhang 
1537710ac8efSHong Zhang   if (nneg) *nneg = mumps->id.INFOG(12);
1538ed85ac9fSHong Zhang   if (nzero || npos) {
153908401ef6SPierre Jolivet     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1540710ac8efSHong Zhang     if (nzero) *nzero = mumps->id.INFOG(28);
1541710ac8efSHong Zhang     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1542a58c3f20SHong Zhang   }
15433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1544a58c3f20SHong Zhang }
154519d49a3bSHong Zhang #endif
1546a58c3f20SHong Zhang 
1547d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1548d71ae5a4SJacob Faibussowitsch {
1549a6053eceSJunchao Zhang   PetscInt       i, nreqs;
1550a6053eceSJunchao Zhang   PetscMUMPSInt *irn, *jcn;
1551a6053eceSJunchao Zhang   PetscMPIInt    count;
1552a6053eceSJunchao Zhang   PetscInt64     totnnz, remain;
1553a6053eceSJunchao Zhang   const PetscInt osize = mumps->omp_comm_size;
1554a6053eceSJunchao Zhang   PetscScalar   *val;
15553ab56b82SJunchao Zhang 
15563ab56b82SJunchao Zhang   PetscFunctionBegin;
1557a6053eceSJunchao Zhang   if (osize > 1) {
15583ab56b82SJunchao Zhang     if (reuse == MAT_INITIAL_MATRIX) {
15593ab56b82SJunchao Zhang       /* master first gathers counts of nonzeros to receive */
15609566063dSJacob Faibussowitsch       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
15619566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
15623ab56b82SJunchao Zhang 
1563a6053eceSJunchao Zhang       /* Then each computes number of send/recvs */
15643ab56b82SJunchao Zhang       if (mumps->is_omp_master) {
1565a6053eceSJunchao Zhang         /* Start from 1 since self communication is not done in MPI */
1566a6053eceSJunchao Zhang         nreqs = 0;
1567a6053eceSJunchao Zhang         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1568a6053eceSJunchao Zhang       } else {
1569a6053eceSJunchao Zhang         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
15703ab56b82SJunchao Zhang       }
157135cb6cd3SPierre Jolivet       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
15723ab56b82SJunchao Zhang 
1573a6053eceSJunchao Zhang       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1574a6053eceSJunchao Zhang          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1575a6053eceSJunchao Zhang          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1576a6053eceSJunchao Zhang          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1577a6053eceSJunchao Zhang        */
1578a6053eceSJunchao Zhang       nreqs = 0; /* counter for actual send/recvs */
15793ab56b82SJunchao Zhang       if (mumps->is_omp_master) {
1580a6053eceSJunchao Zhang         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
15819566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
15829566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(totnnz, &val));
1583a6053eceSJunchao Zhang 
1584a6053eceSJunchao Zhang         /* Self communication */
15859566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
15869566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
15879566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1588a6053eceSJunchao Zhang 
1589a6053eceSJunchao Zhang         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
15909566063dSJacob Faibussowitsch         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
15919566063dSJacob Faibussowitsch         PetscCall(PetscFree(mumps->val_alloc));
1592a6053eceSJunchao Zhang         mumps->nnz = totnnz;
15933ab56b82SJunchao Zhang         mumps->irn = irn;
15943ab56b82SJunchao Zhang         mumps->jcn = jcn;
1595a6053eceSJunchao Zhang         mumps->val = mumps->val_alloc = val;
1596a6053eceSJunchao Zhang 
1597a6053eceSJunchao Zhang         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1598a6053eceSJunchao Zhang         jcn += mumps->recvcount[0];
1599a6053eceSJunchao Zhang         val += mumps->recvcount[0];
1600a6053eceSJunchao Zhang 
1601a6053eceSJunchao Zhang         /* Remote communication */
1602a6053eceSJunchao Zhang         for (i = 1; i < osize; i++) {
1603a6053eceSJunchao Zhang           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1604a6053eceSJunchao Zhang           remain = mumps->recvcount[i] - count;
1605a6053eceSJunchao Zhang           while (count > 0) {
16069566063dSJacob Faibussowitsch             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
16079566063dSJacob Faibussowitsch             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
16089566063dSJacob Faibussowitsch             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1609a6053eceSJunchao Zhang             irn += count;
1610a6053eceSJunchao Zhang             jcn += count;
1611a6053eceSJunchao Zhang             val += count;
1612a6053eceSJunchao Zhang             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1613a6053eceSJunchao Zhang             remain -= count;
1614a6053eceSJunchao Zhang           }
16153ab56b82SJunchao Zhang         }
16163ab56b82SJunchao Zhang       } else {
1617a6053eceSJunchao Zhang         irn    = mumps->irn;
1618a6053eceSJunchao Zhang         jcn    = mumps->jcn;
1619a6053eceSJunchao Zhang         val    = mumps->val;
1620a6053eceSJunchao Zhang         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1621a6053eceSJunchao Zhang         remain = mumps->nnz - count;
1622a6053eceSJunchao Zhang         while (count > 0) {
16239566063dSJacob Faibussowitsch           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
16249566063dSJacob Faibussowitsch           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
16259566063dSJacob Faibussowitsch           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1626a6053eceSJunchao Zhang           irn += count;
1627a6053eceSJunchao Zhang           jcn += count;
1628a6053eceSJunchao Zhang           val += count;
1629a6053eceSJunchao Zhang           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1630a6053eceSJunchao Zhang           remain -= count;
16313ab56b82SJunchao Zhang         }
16323ab56b82SJunchao Zhang       }
1633a6053eceSJunchao Zhang     } else {
1634a6053eceSJunchao Zhang       nreqs = 0;
1635a6053eceSJunchao Zhang       if (mumps->is_omp_master) {
1636a6053eceSJunchao Zhang         val = mumps->val + mumps->recvcount[0];
1637a6053eceSJunchao Zhang         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1638a6053eceSJunchao Zhang           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1639a6053eceSJunchao Zhang           remain = mumps->recvcount[i] - count;
1640a6053eceSJunchao Zhang           while (count > 0) {
16419566063dSJacob Faibussowitsch             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1642a6053eceSJunchao Zhang             val += count;
1643a6053eceSJunchao Zhang             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1644a6053eceSJunchao Zhang             remain -= count;
1645a6053eceSJunchao Zhang           }
1646a6053eceSJunchao Zhang         }
1647a6053eceSJunchao Zhang       } else {
1648a6053eceSJunchao Zhang         val    = mumps->val;
1649a6053eceSJunchao Zhang         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1650a6053eceSJunchao Zhang         remain = mumps->nnz - count;
1651a6053eceSJunchao Zhang         while (count > 0) {
16529566063dSJacob Faibussowitsch           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1653a6053eceSJunchao Zhang           val += count;
1654a6053eceSJunchao Zhang           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1655a6053eceSJunchao Zhang           remain -= count;
1656a6053eceSJunchao Zhang         }
1657a6053eceSJunchao Zhang       }
1658a6053eceSJunchao Zhang     }
16599566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1660a6053eceSJunchao Zhang     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1661a6053eceSJunchao Zhang   }
16623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16633ab56b82SJunchao Zhang }
16643ab56b82SJunchao Zhang 
1665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1666d71ae5a4SJacob Faibussowitsch {
1667e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1668ace3abfcSBarry Smith   PetscBool  isMPIAIJ;
1669397b6df1SKris Buschelman 
1670397b6df1SKris Buschelman   PetscFunctionBegin;
1671dbf6bb8dSprj-   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
167248a46eb9SPierre Jolivet     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
16739566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
16743ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
16752aca8efcSHong Zhang   }
16766baea169SHong Zhang 
16779566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
16789566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1679397b6df1SKris Buschelman 
1680397b6df1SKris Buschelman   /* numerical factorization phase */
1681a5e57a09SHong Zhang   mumps->id.job = JOB_FACTNUMERIC;
16824e34a73bSHong Zhang   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1683ad540459SPierre Jolivet     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1684397b6df1SKris Buschelman   } else {
1685940cd9d6SSatish Balay     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1686397b6df1SKris Buschelman   }
16873ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
1688a5e57a09SHong Zhang   if (mumps->id.INFOG(1) < 0) {
16897a46b595SBarry Smith     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1690c0d63f2fSHong Zhang     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
16919566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1692603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1693c0d63f2fSHong Zhang     } else if (mumps->id.INFOG(1) == -13) {
16949566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1695603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1696c0d63f2fSHong Zhang     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
16979566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1698603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
16992aca8efcSHong Zhang     } else {
17009566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1701603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OTHER;
1702151787a6SHong Zhang     }
17032aca8efcSHong Zhang   }
1704aed4548fSBarry Smith   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1705397b6df1SKris Buschelman 
1706b3cb21ddSStefano Zampini   F->assembled = PETSC_TRUE;
1707d47f36abSHong Zhang 
1708b3cb21ddSStefano Zampini   if (F->schur) { /* reset Schur status to unfactored */
17093cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
1710c70f7ee4SJunchao Zhang     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
17113cb7dd0eSStefano Zampini #endif
1712b3cb21ddSStefano Zampini     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713b3cb21ddSStefano Zampini       mumps->id.ICNTL(19) = 2;
17149566063dSJacob Faibussowitsch       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1715b3cb21ddSStefano Zampini     }
17169566063dSJacob Faibussowitsch     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1717b3cb21ddSStefano Zampini   }
171867877ebaSShri Abhyankar 
1719066565c5SStefano Zampini   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720066565c5SStefano Zampini   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1721066565c5SStefano Zampini 
17223ab56b82SJunchao Zhang   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
17232d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
172467877ebaSShri Abhyankar     PetscInt     lsol_loc;
172567877ebaSShri Abhyankar     PetscScalar *sol_loc;
17262205254eSKarl Rupp 
17279566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1728c2093ab7SHong Zhang 
1729c2093ab7SHong Zhang     /* distributed solution; Create x_seq=sol_loc for repeated use */
1730c2093ab7SHong Zhang     if (mumps->x_seq) {
17319566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&mumps->scat_sol));
17329566063dSJacob Faibussowitsch       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
17339566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&mumps->x_seq));
1734c2093ab7SHong Zhang     }
1735a5e57a09SHong Zhang     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
17369566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1737a5e57a09SHong Zhang     mumps->id.lsol_loc = lsol_loc;
1738940cd9d6SSatish Balay     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
17399566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
174067877ebaSShri Abhyankar   }
17419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
17423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1743397b6df1SKris Buschelman }
1744397b6df1SKris Buschelman 
17459a2535b5SHong Zhang /* Sets MUMPS options from the options database */
1746d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1747d71ae5a4SJacob Faibussowitsch {
1748e69c285eSBarry Smith   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1749413bcc21SPierre Jolivet   PetscMUMPSInt icntl = 0, size, *listvar_schur;
175045e3843bSPierre Jolivet   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1751413bcc21SPierre Jolivet   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1752413bcc21SPierre Jolivet   MumpsScalar  *arr;
1753dcd589f8SShri Abhyankar 
1754dcd589f8SShri Abhyankar   PetscFunctionBegin;
175526cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1756413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1757413bcc21SPierre Jolivet     PetscInt nthreads   = 0;
1758413bcc21SPierre Jolivet     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1759413bcc21SPierre Jolivet     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1760413bcc21SPierre Jolivet 
1761413bcc21SPierre Jolivet     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1762413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1763413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1764413bcc21SPierre Jolivet 
1765413bcc21SPierre Jolivet     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1766413bcc21SPierre Jolivet     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1767413bcc21SPierre Jolivet     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1768413bcc21SPierre Jolivet     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1769413bcc21SPierre Jolivet     if (mumps->use_petsc_omp_support) {
17709371c9d4SSatish Balay       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
17719371c9d4SSatish Balay                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1772413bcc21SPierre Jolivet       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1773413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1774413bcc21SPierre Jolivet       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1775413bcc21SPierre Jolivet       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1776413bcc21SPierre Jolivet #endif
1777413bcc21SPierre Jolivet     } else {
1778413bcc21SPierre Jolivet       mumps->omp_comm      = PETSC_COMM_SELF;
1779413bcc21SPierre Jolivet       mumps->mumps_comm    = mumps->petsc_comm;
1780413bcc21SPierre Jolivet       mumps->is_omp_master = PETSC_TRUE;
1781413bcc21SPierre Jolivet     }
1782413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1783413bcc21SPierre Jolivet     mumps->reqs = NULL;
1784413bcc21SPierre Jolivet     mumps->tag  = 0;
1785413bcc21SPierre Jolivet 
1786413bcc21SPierre Jolivet     if (mumps->mumps_comm != MPI_COMM_NULL) {
1787413bcc21SPierre Jolivet       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1788413bcc21SPierre Jolivet         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1789413bcc21SPierre Jolivet         MPI_Comm comm;
1790413bcc21SPierre Jolivet         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1791413bcc21SPierre Jolivet         mumps->mumps_comm = comm;
1792413bcc21SPierre Jolivet       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1793413bcc21SPierre Jolivet     }
1794413bcc21SPierre Jolivet 
1795413bcc21SPierre Jolivet     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1796413bcc21SPierre Jolivet     mumps->id.job          = JOB_INIT;
1797413bcc21SPierre Jolivet     mumps->id.par          = 1; /* host participates factorizaton and solve */
1798413bcc21SPierre Jolivet     mumps->id.sym          = mumps->sym;
1799413bcc21SPierre Jolivet 
1800413bcc21SPierre Jolivet     size          = mumps->id.size_schur;
1801413bcc21SPierre Jolivet     arr           = mumps->id.schur;
1802413bcc21SPierre Jolivet     listvar_schur = mumps->id.listvar_schur;
1803413bcc21SPierre Jolivet     PetscMUMPS_c(mumps);
1804413bcc21SPierre Jolivet     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1805413bcc21SPierre Jolivet     /* restore cached ICNTL and CNTL values */
1806413bcc21SPierre Jolivet     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1807413bcc21SPierre Jolivet     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1808413bcc21SPierre Jolivet     PetscCall(PetscFree(mumps->ICNTL_pre));
1809413bcc21SPierre Jolivet     PetscCall(PetscFree(mumps->CNTL_pre));
1810413bcc21SPierre Jolivet 
1811413bcc21SPierre Jolivet     if (schur) {
1812413bcc21SPierre Jolivet       mumps->id.size_schur    = size;
1813413bcc21SPierre Jolivet       mumps->id.schur_lld     = size;
1814413bcc21SPierre Jolivet       mumps->id.schur         = arr;
1815413bcc21SPierre Jolivet       mumps->id.listvar_schur = listvar_schur;
1816413bcc21SPierre Jolivet       if (mumps->petsc_size > 1) {
1817413bcc21SPierre Jolivet         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1818413bcc21SPierre Jolivet 
1819413bcc21SPierre Jolivet         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1820413bcc21SPierre Jolivet         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1821712fec58SPierre Jolivet         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1822413bcc21SPierre Jolivet         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1823413bcc21SPierre Jolivet       } else {
1824413bcc21SPierre Jolivet         if (F->factortype == MAT_FACTOR_LU) {
1825413bcc21SPierre Jolivet           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1826413bcc21SPierre Jolivet         } else {
1827413bcc21SPierre Jolivet           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1828413bcc21SPierre Jolivet         }
1829413bcc21SPierre Jolivet       }
1830413bcc21SPierre Jolivet       mumps->id.ICNTL(26) = -1;
1831413bcc21SPierre Jolivet     }
1832413bcc21SPierre Jolivet 
1833413bcc21SPierre Jolivet     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1834413bcc21SPierre Jolivet        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1835413bcc21SPierre Jolivet      */
1836413bcc21SPierre Jolivet     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1837413bcc21SPierre Jolivet     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1838413bcc21SPierre Jolivet 
1839413bcc21SPierre Jolivet     mumps->scat_rhs = NULL;
1840413bcc21SPierre Jolivet     mumps->scat_sol = NULL;
1841413bcc21SPierre Jolivet 
1842413bcc21SPierre Jolivet     /* set PETSc-MUMPS default options - override MUMPS default */
1843413bcc21SPierre Jolivet     mumps->id.ICNTL(3) = 0;
1844413bcc21SPierre Jolivet     mumps->id.ICNTL(4) = 0;
1845413bcc21SPierre Jolivet     if (mumps->petsc_size == 1) {
1846413bcc21SPierre Jolivet       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1847413bcc21SPierre Jolivet       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1848413bcc21SPierre Jolivet     } else {
1849413bcc21SPierre Jolivet       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1850413bcc21SPierre Jolivet       mumps->id.ICNTL(21) = 1; /* distributed solution */
1851413bcc21SPierre Jolivet     }
1852413bcc21SPierre Jolivet   }
18539566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
18549a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(1) = icntl;
18559566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
18569a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(2) = icntl;
18579566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
18589a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(3) = icntl;
1859dcd589f8SShri Abhyankar 
18609566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
18619a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(4) = icntl;
18629a2535b5SHong Zhang   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
18639a2535b5SHong Zhang 
18649566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
18659a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(6) = icntl;
18669a2535b5SHong Zhang 
18679566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1868dcd589f8SShri Abhyankar   if (flg) {
1869aed4548fSBarry Smith     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1870b53c1a7fSBarry Smith     mumps->id.ICNTL(7) = icntl;
1871dcd589f8SShri Abhyankar   }
1872e0b74bf9SHong Zhang 
18739566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
18749566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
18759566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
18769566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
18779566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
18789566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
18799566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
188045e3843bSPierre Jolivet   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
188145e3843bSPierre Jolivet   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
188245e3843bSPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
188345e3843bSPierre Jolivet   if (flg) {
188445e3843bSPierre Jolivet     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
188545e3843bSPierre Jolivet     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
188645e3843bSPierre Jolivet   }
18879566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
188859ac8732SStefano Zampini   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
18899566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&F->schur));
18909566063dSJacob Faibussowitsch     PetscCall(MatMumpsResetSchur_Private(mumps));
189159ac8732SStefano Zampini   }
189225aac85cSJunchao Zhang 
189343f3b051SJunchao Zhang   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
189443f3b051SJunchao Zhang      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
189525aac85cSJunchao Zhang      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
189643f3b051SJunchao Zhang      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
189743f3b051SJunchao Zhang      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
189843f3b051SJunchao Zhang      In short, we could not use distributed RHS with MPICH until v4.0b1.
189925aac85cSJunchao Zhang    */
190043f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
190125aac85cSJunchao Zhang   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
190243f3b051SJunchao Zhang #else
190343f3b051SJunchao Zhang   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
190425aac85cSJunchao Zhang #endif
19059566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1906aed4548fSBarry Smith   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
190725aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1908aed4548fSBarry Smith   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
190925aac85cSJunchao Zhang #endif
19109566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
19119a2535b5SHong Zhang 
19129566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
19139566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
19149566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
19159371c9d4SSatish Balay   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1916d7ebd59bSHong Zhang 
19179566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
19189566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
19199566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
19209566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
19219566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
19229566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
19239566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
19249566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
19259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
19269566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
19279566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
19289566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1929dcd589f8SShri Abhyankar 
19309566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
19319566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
19329566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
19339566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
19349566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
19359566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1936e5bb22a1SHong Zhang 
19379566063dSJacob Faibussowitsch   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1938b34f08ffSHong Zhang 
19399566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1940b34f08ffSHong Zhang   if (ninfo) {
194108401ef6SPierre Jolivet     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
19429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1943b34f08ffSHong Zhang     mumps->ninfo = ninfo;
1944b34f08ffSHong Zhang     for (i = 0; i < ninfo; i++) {
1945aed4548fSBarry Smith       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1946f7d195e4SLawrence Mitchell       mumps->info[i] = info[i];
1947b34f08ffSHong Zhang     }
1948b34f08ffSHong Zhang   }
1949d0609cedSBarry Smith   PetscOptionsEnd();
19503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1951dcd589f8SShri Abhyankar }
1952dcd589f8SShri Abhyankar 
1953d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1954d71ae5a4SJacob Faibussowitsch {
19555cd7cf9dSHong Zhang   PetscFunctionBegin;
19565cd7cf9dSHong Zhang   if (mumps->id.INFOG(1) < 0) {
19577a46b595SBarry Smith     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
19585cd7cf9dSHong Zhang     if (mumps->id.INFOG(1) == -6) {
19599566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1960603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
19615cd7cf9dSHong Zhang     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
19629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1963603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1964dbf6bb8dSprj-     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
19659566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "Empty matrix\n"));
19665cd7cf9dSHong Zhang     } else {
19679566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1968603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OTHER;
19695cd7cf9dSHong Zhang     }
19705cd7cf9dSHong Zhang   }
19713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19725cd7cf9dSHong Zhang }
19735cd7cf9dSHong Zhang 
1974d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1975d71ae5a4SJacob Faibussowitsch {
1976e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
197767877ebaSShri Abhyankar   Vec            b;
197867877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
1979397b6df1SKris Buschelman 
1980397b6df1SKris Buschelman   PetscFunctionBegin;
1981d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1982d47f36abSHong Zhang     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
19833ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1984d47f36abSHong Zhang   }
1985dcd589f8SShri Abhyankar 
19869a2535b5SHong Zhang   /* Set MUMPS options from the options database */
198726cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
1988dcd589f8SShri Abhyankar 
19899566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
19909566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1991dcd589f8SShri Abhyankar 
199267877ebaSShri Abhyankar   /* analysis phase */
1993a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
1994a5e57a09SHong Zhang   mumps->id.n   = M;
1995a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
199667877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
1997a5e57a09SHong Zhang     if (!mumps->myid) {
1998a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
1999a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
2000a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
2001a6053eceSJunchao Zhang       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
20024ac6704cSBarry Smith       if (r) {
20034ac6704cSBarry Smith         mumps->id.ICNTL(7) = 1;
2004a5e57a09SHong Zhang         if (!mumps->myid) {
2005e0b74bf9SHong Zhang           const PetscInt *idx;
2006a6053eceSJunchao Zhang           PetscInt        i;
20072205254eSKarl Rupp 
20089566063dSJacob Faibussowitsch           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
20099566063dSJacob Faibussowitsch           PetscCall(ISGetIndices(r, &idx));
20109566063dSJacob Faibussowitsch           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
20119566063dSJacob Faibussowitsch           PetscCall(ISRestoreIndices(r, &idx));
2012e0b74bf9SHong Zhang         }
2013e0b74bf9SHong Zhang       }
201467877ebaSShri Abhyankar     }
201567877ebaSShri Abhyankar     break;
201667877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
2017a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
2018a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
2019a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
2020a6053eceSJunchao Zhang     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
202125aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
20229566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
20239566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
20249566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
202525aac85cSJunchao Zhang     }
202667877ebaSShri Abhyankar     break;
202767877ebaSShri Abhyankar   }
20283ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
20299566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
203067877ebaSShri Abhyankar 
2031719d5645SBarry Smith   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2032dcd589f8SShri Abhyankar   F->ops->solve             = MatSolve_MUMPS;
203351d5961aSHong Zhang   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
20344e34a73bSHong Zhang   F->ops->matsolve          = MatMatSolve_MUMPS;
2035eb3ef3b2SHong Zhang   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2036b18964edSHong Zhang   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2037d47f36abSHong Zhang 
2038d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
20393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2040b24902e0SBarry Smith }
2041b24902e0SBarry Smith 
2042450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */
2043d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2044d71ae5a4SJacob Faibussowitsch {
2045e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
204667877ebaSShri Abhyankar   Vec            b;
204767877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
2048450b117fSShri Abhyankar 
2049450b117fSShri Abhyankar   PetscFunctionBegin;
2050d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2051*338d3105SPierre Jolivet     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
20523ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2053d47f36abSHong Zhang   }
2054dcd589f8SShri Abhyankar 
20559a2535b5SHong Zhang   /* Set MUMPS options from the options database */
205626cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
2057dcd589f8SShri Abhyankar 
20589566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
20599566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
206067877ebaSShri Abhyankar 
206167877ebaSShri Abhyankar   /* analysis phase */
2062a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
2063a5e57a09SHong Zhang   mumps->id.n   = M;
2064a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
206567877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
2066a5e57a09SHong Zhang     if (!mumps->myid) {
2067a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
2068a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
2069a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
2070ad540459SPierre Jolivet       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
207167877ebaSShri Abhyankar     }
207267877ebaSShri Abhyankar     break;
207367877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
2074a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
2075a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
2076a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
2077ad540459SPierre Jolivet     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
207825aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
20799566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
20809566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
20819566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
208225aac85cSJunchao Zhang     }
208367877ebaSShri Abhyankar     break;
208467877ebaSShri Abhyankar   }
20853ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
20869566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
208767877ebaSShri Abhyankar 
2088450b117fSShri Abhyankar   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2089dcd589f8SShri Abhyankar   F->ops->solve             = MatSolve_MUMPS;
209051d5961aSHong Zhang   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2091b18964edSHong Zhang   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2092d47f36abSHong Zhang 
2093d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
20943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2095450b117fSShri Abhyankar }
2096b24902e0SBarry Smith 
2097141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */
2098d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2099d71ae5a4SJacob Faibussowitsch {
2100e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
210167877ebaSShri Abhyankar   Vec            b;
210267877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
2103397b6df1SKris Buschelman 
2104397b6df1SKris Buschelman   PetscFunctionBegin;
2105d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2106*338d3105SPierre Jolivet     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
21073ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2108d47f36abSHong Zhang   }
2109dcd589f8SShri Abhyankar 
21109a2535b5SHong Zhang   /* Set MUMPS options from the options database */
211126cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
2112dcd589f8SShri Abhyankar 
21139566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
21149566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2115dcd589f8SShri Abhyankar 
211667877ebaSShri Abhyankar   /* analysis phase */
2117a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
2118a5e57a09SHong Zhang   mumps->id.n   = M;
2119a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
212067877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
2121a5e57a09SHong Zhang     if (!mumps->myid) {
2122a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
2123a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
2124a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
2125ad540459SPierre Jolivet       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
212667877ebaSShri Abhyankar     }
212767877ebaSShri Abhyankar     break;
212867877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
2129a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
2130a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
2131a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
2132ad540459SPierre Jolivet     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
213325aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
21349566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
21359566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
21369566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
213725aac85cSJunchao Zhang     }
213867877ebaSShri Abhyankar     break;
213967877ebaSShri Abhyankar   }
21403ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
21419566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
21425cd7cf9dSHong Zhang 
21432792810eSHong Zhang   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2144dcd589f8SShri Abhyankar   F->ops->solve                 = MatSolve_MUMPS;
214551d5961aSHong Zhang   F->ops->solvetranspose        = MatSolve_MUMPS;
21464e34a73bSHong Zhang   F->ops->matsolve              = MatMatSolve_MUMPS;
214723a5080aSHong Zhang   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2148b18964edSHong Zhang   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
21494e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX)
21500298fd71SBarry Smith   F->ops->getinertia = NULL;
21514e34a73bSHong Zhang #else
21524e34a73bSHong Zhang   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2153db4efbfdSBarry Smith #endif
2154d47f36abSHong Zhang 
2155d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
21563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2157b24902e0SBarry Smith }
2158b24902e0SBarry Smith 
2159d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2160d71ae5a4SJacob Faibussowitsch {
216164e6c443SBarry Smith   PetscBool         iascii;
216264e6c443SBarry Smith   PetscViewerFormat format;
2163e69c285eSBarry Smith   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2164f6c57405SHong Zhang 
2165f6c57405SHong Zhang   PetscFunctionBegin;
216664e6c443SBarry Smith   /* check if matrix is mumps type */
21673ba16761SJacob Faibussowitsch   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
216864e6c443SBarry Smith 
21699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
217064e6c443SBarry Smith   if (iascii) {
21719566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
21721511cd71SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
21739566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
21741511cd71SPierre Jolivet       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
21759566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
21769566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
21779566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
21789566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
21799566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
21809566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
21819566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
21829566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
21839566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
21849566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
21859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
21869566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2187a5e57a09SHong Zhang         if (mumps->id.ICNTL(11) > 0) {
21889566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
21899566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
21909566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
21919566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
21929566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
21939566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2194f6c57405SHong Zhang         }
21959566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
21969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
21979566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
219845e3843bSPierre Jolivet         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2199f6c57405SHong Zhang         /* ICNTL(15-17) not used */
22009566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
22019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
22029566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
22039566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
22049566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
22059566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2206c0165424SHong Zhang 
22079566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
22089566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
22099566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
22109566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
22119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
22129566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
221342179a6aSHong Zhang 
22149566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
22159566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
22169566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
22179566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
22189566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
22199566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2220f6c57405SHong Zhang 
22219566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
22229566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
22239566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
22249566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
22259566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
22269566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2227f6c57405SHong Zhang 
2228a5b23f4aSJose E. Roman         /* information local to each processor */
22299566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
22309566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
22319566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
22329566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
22339566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
22349566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
22359566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
22369566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
22379566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
22389566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
2239f6c57405SHong Zhang 
22409566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
22419566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
22429566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
2243f6c57405SHong Zhang 
22449566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
22459566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
22469566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
2247f6c57405SHong Zhang 
22489566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
22499566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
22509566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
2251b34f08ffSHong Zhang 
2252a0e18203SThibaut Appel         if (mumps->ninfo && mumps->ninfo <= 80) {
2253b34f08ffSHong Zhang           PetscInt i;
2254b34f08ffSHong Zhang           for (i = 0; i < mumps->ninfo; i++) {
22559566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
22569566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
22579566063dSJacob Faibussowitsch             PetscCall(PetscViewerFlush(viewer));
2258b34f08ffSHong Zhang           }
2259b34f08ffSHong Zhang         }
22609566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
22611511cd71SPierre Jolivet       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2262f6c57405SHong Zhang 
22631511cd71SPierre Jolivet       if (mumps->myid == 0) { /* information from the host */
22649566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
22659566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
22669566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
22679566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2268f6c57405SHong Zhang 
22699566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
22709566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
22719566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
22729566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
22739566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
22749566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
22759566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
22769566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
22779566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
22789566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
22799566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
22809566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
22819566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
22829566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
22839566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
22849566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
22859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
22869566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
22879566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
22889566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
22899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
22909566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
22919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
22929566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
22939566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
22949566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
22959566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
22969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
22979566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
22989566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
22999566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
23009566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
23019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
23029566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2303f6c57405SHong Zhang       }
2304f6c57405SHong Zhang     }
2305cb828f0fSHong Zhang   }
23063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2307f6c57405SHong Zhang }
2308f6c57405SHong Zhang 
2309d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2310d71ae5a4SJacob Faibussowitsch {
2311e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
231235bd34faSBarry Smith 
231335bd34faSBarry Smith   PetscFunctionBegin;
231435bd34faSBarry Smith   info->block_size        = 1.0;
2315cb828f0fSHong Zhang   info->nz_allocated      = mumps->id.INFOG(20);
2316cb828f0fSHong Zhang   info->nz_used           = mumps->id.INFOG(20);
231735bd34faSBarry Smith   info->nz_unneeded       = 0.0;
231835bd34faSBarry Smith   info->assemblies        = 0.0;
231935bd34faSBarry Smith   info->mallocs           = 0.0;
232035bd34faSBarry Smith   info->memory            = 0.0;
232135bd34faSBarry Smith   info->fill_ratio_given  = 0;
232235bd34faSBarry Smith   info->fill_ratio_needed = 0;
232335bd34faSBarry Smith   info->factor_mallocs    = 0;
23243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
232535bd34faSBarry Smith }
232635bd34faSBarry Smith 
2327d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2328d71ae5a4SJacob Faibussowitsch {
2329e69c285eSBarry Smith   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2330a3d589ffSStefano Zampini   const PetscScalar *arr;
23318e7ba810SStefano Zampini   const PetscInt    *idxs;
23328e7ba810SStefano Zampini   PetscInt           size, i;
23336444a565SStefano Zampini 
23346444a565SStefano Zampini   PetscFunctionBegin;
23359566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(is, &size));
2336b3cb21ddSStefano Zampini   /* Schur complement matrix */
23379566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&F->schur));
23389566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
23399566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2340a3d589ffSStefano Zampini   mumps->id.schur      = (MumpsScalar *)arr;
2341a3d589ffSStefano Zampini   mumps->id.size_schur = size;
2342a3d589ffSStefano Zampini   mumps->id.schur_lld  = size;
23439566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
234448a46eb9SPierre Jolivet   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2345b3cb21ddSStefano Zampini 
2346b3cb21ddSStefano Zampini   /* MUMPS expects Fortran style indices */
23479566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.listvar_schur));
23489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
23499566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(is, &idxs));
23509566063dSJacob Faibussowitsch   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
23519566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(is, &idxs));
235259ac8732SStefano Zampini   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2353b5fa320bSStefano Zampini   mumps->id.ICNTL(26) = -1;
23543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
23556444a565SStefano Zampini }
235659ac8732SStefano Zampini 
2357d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2358d71ae5a4SJacob Faibussowitsch {
23596444a565SStefano Zampini   Mat          St;
2360e69c285eSBarry Smith   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
23616444a565SStefano Zampini   PetscScalar *array;
23626444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX)
23638ac429a0SStefano Zampini   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
23646444a565SStefano Zampini #endif
23656444a565SStefano Zampini 
23666444a565SStefano Zampini   PetscFunctionBegin;
236708401ef6SPierre Jolivet   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
23689566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
23699566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
23709566063dSJacob Faibussowitsch   PetscCall(MatSetType(St, MATDENSE));
23719566063dSJacob Faibussowitsch   PetscCall(MatSetUp(St));
23729566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(St, &array));
237359ac8732SStefano Zampini   if (!mumps->sym) {                /* MUMPS always return a full matrix */
23746444a565SStefano Zampini     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
23756444a565SStefano Zampini       PetscInt i, j, N = mumps->id.size_schur;
23766444a565SStefano Zampini       for (i = 0; i < N; i++) {
23776444a565SStefano Zampini         for (j = 0; j < N; j++) {
23786444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX)
23796444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j];
23806444a565SStefano Zampini #else
23816444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
23826444a565SStefano Zampini #endif
23836444a565SStefano Zampini           array[j * N + i] = val;
23846444a565SStefano Zampini         }
23856444a565SStefano Zampini       }
23866444a565SStefano Zampini     } else { /* stored by columns */
23879566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
23886444a565SStefano Zampini     }
23896444a565SStefano Zampini   } else {                          /* either full or lower-triangular (not packed) */
23906444a565SStefano Zampini     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
23916444a565SStefano Zampini       PetscInt i, j, N = mumps->id.size_schur;
23926444a565SStefano Zampini       for (i = 0; i < N; i++) {
23936444a565SStefano Zampini         for (j = i; j < N; j++) {
23946444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX)
23956444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j];
23966444a565SStefano Zampini #else
23976444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
23986444a565SStefano Zampini #endif
23996444a565SStefano Zampini           array[i * N + j] = val;
24006444a565SStefano Zampini           array[j * N + i] = val;
24016444a565SStefano Zampini         }
24026444a565SStefano Zampini       }
24036444a565SStefano Zampini     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
24049566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
24056444a565SStefano Zampini     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
24066444a565SStefano Zampini       PetscInt i, j, N = mumps->id.size_schur;
24076444a565SStefano Zampini       for (i = 0; i < N; i++) {
24086444a565SStefano Zampini         for (j = 0; j < i + 1; j++) {
24096444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX)
24106444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j];
24116444a565SStefano Zampini #else
24126444a565SStefano Zampini           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
24136444a565SStefano Zampini #endif
24146444a565SStefano Zampini           array[i * N + j] = val;
24156444a565SStefano Zampini           array[j * N + i] = val;
24166444a565SStefano Zampini         }
24176444a565SStefano Zampini       }
24186444a565SStefano Zampini     }
24196444a565SStefano Zampini   }
24209566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(St, &array));
24216444a565SStefano Zampini   *S = St;
24223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24236444a565SStefano Zampini }
24246444a565SStefano Zampini 
2425d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2426d71ae5a4SJacob Faibussowitsch {
2427e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
24285ccb76cbSHong Zhang 
24295ccb76cbSHong Zhang   PetscFunctionBegin;
2430413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2431413bcc21SPierre Jolivet     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
24329371c9d4SSatish Balay     for (i = 0; i < nICNTL_pre; ++i)
24339371c9d4SSatish Balay       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2434413bcc21SPierre Jolivet     if (i == nICNTL_pre) {                             /* not already cached */
2435413bcc21SPierre Jolivet       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2436413bcc21SPierre Jolivet       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2437413bcc21SPierre Jolivet       mumps->ICNTL_pre[0]++;
2438413bcc21SPierre Jolivet     }
2439413bcc21SPierre Jolivet     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2440413bcc21SPierre Jolivet     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2441413bcc21SPierre Jolivet   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
24423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24435ccb76cbSHong Zhang }
24445ccb76cbSHong Zhang 
2445d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2446d71ae5a4SJacob Faibussowitsch {
2447e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2448bc6112feSHong Zhang 
2449bc6112feSHong Zhang   PetscFunctionBegin;
245036df9881Sjeremy theler   if (mumps->id.job == JOB_NULL) {
245136df9881Sjeremy theler     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
245236df9881Sjeremy theler     *ival = 0;
245336df9881Sjeremy theler     for (i = 0; i < nICNTL_pre; ++i) {
245436df9881Sjeremy theler       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
245536df9881Sjeremy theler     }
245636df9881Sjeremy theler   } else *ival = mumps->id.ICNTL(icntl);
24573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2458bc6112feSHong Zhang }
2459bc6112feSHong Zhang 
24605ccb76cbSHong Zhang /*@
24615ccb76cbSHong Zhang   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
24625ccb76cbSHong Zhang 
2463c3339decSBarry Smith    Logically Collective
24645ccb76cbSHong Zhang 
24655ccb76cbSHong Zhang    Input Parameters:
246611a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
24675ccb76cbSHong Zhang .  icntl - index of MUMPS parameter array ICNTL()
24685ccb76cbSHong Zhang -  ival - value of MUMPS ICNTL(icntl)
24695ccb76cbSHong Zhang 
24703c7db156SBarry Smith   Options Database Key:
2471147403d9SBarry Smith .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
24725ccb76cbSHong Zhang 
24735ccb76cbSHong Zhang    Level: beginner
24745ccb76cbSHong Zhang 
247596a0c994SBarry Smith    References:
2476606c0280SSatish Balay .  * - MUMPS Users' Guide
24775ccb76cbSHong Zhang 
24781cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
24795ccb76cbSHong Zhang @*/
2480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2481d71ae5a4SJacob Faibussowitsch {
24825ccb76cbSHong Zhang   PetscFunctionBegin;
24832989dfd4SHong Zhang   PetscValidType(F, 1);
248428b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
24855ccb76cbSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
24865ccb76cbSHong Zhang   PetscValidLogicalCollectiveInt(F, ival, 3);
2487413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2488cac4c232SBarry Smith   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
24893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24905ccb76cbSHong Zhang }
24915ccb76cbSHong Zhang 
2492a21f80fcSHong Zhang /*@
2493a21f80fcSHong Zhang   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2494a21f80fcSHong Zhang 
2495c3339decSBarry Smith    Logically Collective
2496a21f80fcSHong Zhang 
2497a21f80fcSHong Zhang    Input Parameters:
249811a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2499a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array ICNTL()
2500a21f80fcSHong Zhang 
2501a21f80fcSHong Zhang   Output Parameter:
2502a21f80fcSHong Zhang .  ival - value of MUMPS ICNTL(icntl)
2503a21f80fcSHong Zhang 
2504a21f80fcSHong Zhang    Level: beginner
2505a21f80fcSHong Zhang 
250696a0c994SBarry Smith    References:
2507606c0280SSatish Balay .  * - MUMPS Users' Guide
2508a21f80fcSHong Zhang 
25091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2510a21f80fcSHong Zhang @*/
2511d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2512d71ae5a4SJacob Faibussowitsch {
2513bc6112feSHong Zhang   PetscFunctionBegin;
25142989dfd4SHong Zhang   PetscValidType(F, 1);
251528b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2516bc6112feSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
2517bc6112feSHong Zhang   PetscValidIntPointer(ival, 3);
2518413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2519cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
25203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2521bc6112feSHong Zhang }
2522bc6112feSHong Zhang 
2523d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2524d71ae5a4SJacob Faibussowitsch {
2525e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
25268928b65cSHong Zhang 
25278928b65cSHong Zhang   PetscFunctionBegin;
2528413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) {
2529413bcc21SPierre Jolivet     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
25309371c9d4SSatish Balay     for (i = 0; i < nCNTL_pre; ++i)
25319371c9d4SSatish Balay       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2532413bcc21SPierre Jolivet     if (i == nCNTL_pre) {
2533413bcc21SPierre Jolivet       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2534413bcc21SPierre Jolivet       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2535413bcc21SPierre Jolivet       mumps->CNTL_pre[0]++;
2536413bcc21SPierre Jolivet     }
2537413bcc21SPierre Jolivet     mumps->CNTL_pre[1 + 2 * i] = icntl;
2538413bcc21SPierre Jolivet     mumps->CNTL_pre[2 + 2 * i] = val;
2539413bcc21SPierre Jolivet   } else mumps->id.CNTL(icntl) = val;
25403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25418928b65cSHong Zhang }
25428928b65cSHong Zhang 
2543d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2544d71ae5a4SJacob Faibussowitsch {
2545e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2546bc6112feSHong Zhang 
2547bc6112feSHong Zhang   PetscFunctionBegin;
254836df9881Sjeremy theler   if (mumps->id.job == JOB_NULL) {
254936df9881Sjeremy theler     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
255036df9881Sjeremy theler     *val = 0.0;
255136df9881Sjeremy theler     for (i = 0; i < nCNTL_pre; ++i) {
255236df9881Sjeremy theler       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
255336df9881Sjeremy theler     }
255436df9881Sjeremy theler   } else *val = mumps->id.CNTL(icntl);
25553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2556bc6112feSHong Zhang }
2557bc6112feSHong Zhang 
25588928b65cSHong Zhang /*@
25598928b65cSHong Zhang   MatMumpsSetCntl - Set MUMPS parameter CNTL()
25608928b65cSHong Zhang 
2561c3339decSBarry Smith    Logically Collective
25628928b65cSHong Zhang 
25638928b65cSHong Zhang    Input Parameters:
256411a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
25658928b65cSHong Zhang .  icntl - index of MUMPS parameter array CNTL()
25668928b65cSHong Zhang -  val - value of MUMPS CNTL(icntl)
25678928b65cSHong Zhang 
25683c7db156SBarry Smith   Options Database Key:
2569147403d9SBarry Smith .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
25708928b65cSHong Zhang 
25718928b65cSHong Zhang    Level: beginner
25728928b65cSHong Zhang 
257396a0c994SBarry Smith    References:
2574606c0280SSatish Balay .  * - MUMPS Users' Guide
25758928b65cSHong Zhang 
25761cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
25778928b65cSHong Zhang @*/
2578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2579d71ae5a4SJacob Faibussowitsch {
25808928b65cSHong Zhang   PetscFunctionBegin;
25812989dfd4SHong Zhang   PetscValidType(F, 1);
258228b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
25838928b65cSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
2584bc6112feSHong Zhang   PetscValidLogicalCollectiveReal(F, val, 3);
2585413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2586cac4c232SBarry Smith   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
25873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25888928b65cSHong Zhang }
25898928b65cSHong Zhang 
2590a21f80fcSHong Zhang /*@
2591a21f80fcSHong Zhang   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2592a21f80fcSHong Zhang 
2593c3339decSBarry Smith    Logically Collective
2594a21f80fcSHong Zhang 
2595a21f80fcSHong Zhang    Input Parameters:
259611a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2597a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array CNTL()
2598a21f80fcSHong Zhang 
2599a21f80fcSHong Zhang   Output Parameter:
2600a21f80fcSHong Zhang .  val - value of MUMPS CNTL(icntl)
2601a21f80fcSHong Zhang 
2602a21f80fcSHong Zhang    Level: beginner
2603a21f80fcSHong Zhang 
260496a0c994SBarry Smith    References:
2605606c0280SSatish Balay .  * - MUMPS Users' Guide
2606a21f80fcSHong Zhang 
26071cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2608a21f80fcSHong Zhang @*/
2609d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2610d71ae5a4SJacob Faibussowitsch {
2611bc6112feSHong Zhang   PetscFunctionBegin;
26122989dfd4SHong Zhang   PetscValidType(F, 1);
261328b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2614bc6112feSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
2615bc6112feSHong Zhang   PetscValidRealPointer(val, 3);
2616413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2617cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
26183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2619bc6112feSHong Zhang }
2620bc6112feSHong Zhang 
2621d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2622d71ae5a4SJacob Faibussowitsch {
2623e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2624bc6112feSHong Zhang 
2625bc6112feSHong Zhang   PetscFunctionBegin;
2626bc6112feSHong Zhang   *info = mumps->id.INFO(icntl);
26273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2628bc6112feSHong Zhang }
2629bc6112feSHong Zhang 
2630d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2631d71ae5a4SJacob Faibussowitsch {
2632e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2633bc6112feSHong Zhang 
2634bc6112feSHong Zhang   PetscFunctionBegin;
2635bc6112feSHong Zhang   *infog = mumps->id.INFOG(icntl);
26363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2637bc6112feSHong Zhang }
2638bc6112feSHong Zhang 
2639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2640d71ae5a4SJacob Faibussowitsch {
2641e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2642bc6112feSHong Zhang 
2643bc6112feSHong Zhang   PetscFunctionBegin;
2644bc6112feSHong Zhang   *rinfo = mumps->id.RINFO(icntl);
26453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2646bc6112feSHong Zhang }
2647bc6112feSHong Zhang 
2648d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2649d71ae5a4SJacob Faibussowitsch {
2650e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2651bc6112feSHong Zhang 
2652bc6112feSHong Zhang   PetscFunctionBegin;
2653bc6112feSHong Zhang   *rinfog = mumps->id.RINFOG(icntl);
26543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2655bc6112feSHong Zhang }
2656bc6112feSHong Zhang 
26575c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
26585c0bae8cSAshish Patel {
26595c0bae8cSAshish Patel   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
26605c0bae8cSAshish Patel 
26615c0bae8cSAshish Patel   PetscFunctionBegin;
26625c0bae8cSAshish Patel   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
26635c0bae8cSAshish Patel   *size  = 0;
26645c0bae8cSAshish Patel   *array = NULL;
26655c0bae8cSAshish Patel   if (!mumps->myid) {
26665c0bae8cSAshish Patel     *size = mumps->id.INFOG(28);
26675c0bae8cSAshish Patel     PetscCall(PetscMalloc1(*size, array));
26685c0bae8cSAshish Patel     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
26695c0bae8cSAshish Patel   }
26705c0bae8cSAshish Patel   PetscFunctionReturn(PETSC_SUCCESS);
26715c0bae8cSAshish Patel }
26725c0bae8cSAshish Patel 
2673d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2674d71ae5a4SJacob Faibussowitsch {
26750e6b8875SHong Zhang   Mat          Bt = NULL, Btseq = NULL;
26760e6b8875SHong Zhang   PetscBool    flg;
2677bb599dfdSHong Zhang   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2678bb599dfdSHong Zhang   PetscScalar *aa;
2679f410b75aSHong Zhang   PetscInt     spnr, *ia, *ja, M, nrhs;
2680bb599dfdSHong Zhang 
2681bb599dfdSHong Zhang   PetscFunctionBegin;
2682064a246eSJacob Faibussowitsch   PetscValidPointer(spRHS, 2);
2683013e2dc7SBarry Smith   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
26840e6b8875SHong Zhang   if (flg) {
26859566063dSJacob Faibussowitsch     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2686013e2dc7SBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2687bb599dfdSHong Zhang 
26889566063dSJacob Faibussowitsch   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2689bb599dfdSHong Zhang 
26902d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
26910e6b8875SHong Zhang     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
26920e6b8875SHong Zhang     Btseq         = b->A;
26930e6b8875SHong Zhang   } else {
26940e6b8875SHong Zhang     Btseq = Bt;
26950e6b8875SHong Zhang   }
26960e6b8875SHong Zhang 
26979566063dSJacob Faibussowitsch   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2698f410b75aSHong Zhang   mumps->id.nrhs = nrhs;
2699f410b75aSHong Zhang   mumps->id.lrhs = M;
2700f410b75aSHong Zhang   mumps->id.rhs  = NULL;
2701f410b75aSHong Zhang 
2702e3f2db6aSHong Zhang   if (!mumps->myid) {
27039566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
27049566063dSJacob Faibussowitsch     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
270528b400f6SJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
27069566063dSJacob Faibussowitsch     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2707bb599dfdSHong Zhang     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2708e3f2db6aSHong Zhang   } else {
2709e3f2db6aSHong Zhang     mumps->id.irhs_ptr    = NULL;
2710e3f2db6aSHong Zhang     mumps->id.irhs_sparse = NULL;
2711e3f2db6aSHong Zhang     mumps->id.nz_rhs      = 0;
2712e3f2db6aSHong Zhang     mumps->id.rhs_sparse  = NULL;
2713e3f2db6aSHong Zhang   }
2714bb599dfdSHong Zhang   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2715e3f2db6aSHong Zhang   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2716bb599dfdSHong Zhang 
2717bb599dfdSHong Zhang   /* solve phase */
2718bb599dfdSHong Zhang   mumps->id.job = JOB_SOLVE;
27193ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
2720049d1499SBarry Smith   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
272114267174SHong Zhang 
2722e3f2db6aSHong Zhang   if (!mumps->myid) {
27239566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
27249566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
272528b400f6SJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2726e3f2db6aSHong Zhang   }
27273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2728bb599dfdSHong Zhang }
2729bb599dfdSHong Zhang 
2730bb599dfdSHong Zhang /*@
27312ef1f0ffSBarry Smith   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2732bb599dfdSHong Zhang 
2733c3339decSBarry Smith    Logically Collective
2734bb599dfdSHong Zhang 
273520f4b53cSBarry Smith    Input Parameter:
273620f4b53cSBarry Smith .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2737bb599dfdSHong Zhang 
2738bb599dfdSHong Zhang   Output Parameter:
273920f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2740bb599dfdSHong Zhang 
2741bb599dfdSHong Zhang    Level: beginner
2742bb599dfdSHong Zhang 
2743bb599dfdSHong Zhang    References:
2744606c0280SSatish Balay .  * - MUMPS Users' Guide
2745bb599dfdSHong Zhang 
27461cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2747bb599dfdSHong Zhang @*/
2748d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2749d71ae5a4SJacob Faibussowitsch {
2750bb599dfdSHong Zhang   PetscFunctionBegin;
2751bb599dfdSHong Zhang   PetscValidType(F, 1);
275228b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2753cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
27543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2755bb599dfdSHong Zhang }
2756bb599dfdSHong Zhang 
2757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2758d71ae5a4SJacob Faibussowitsch {
27590e6b8875SHong Zhang   Mat spRHS;
27600e6b8875SHong Zhang 
27610e6b8875SHong Zhang   PetscFunctionBegin;
27629566063dSJacob Faibussowitsch   PetscCall(MatCreateTranspose(spRHST, &spRHS));
27639566063dSJacob Faibussowitsch   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
27649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&spRHS));
27653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27660e6b8875SHong Zhang }
27670e6b8875SHong Zhang 
27680e6b8875SHong Zhang /*@
27692ef1f0ffSBarry Smith   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
27700e6b8875SHong Zhang 
2771c3339decSBarry Smith    Logically Collective
27720e6b8875SHong Zhang 
277320f4b53cSBarry Smith    Input Parameter:
277420f4b53cSBarry Smith .  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
27750e6b8875SHong Zhang 
27760e6b8875SHong Zhang   Output Parameter:
277720f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
27780e6b8875SHong Zhang 
27790e6b8875SHong Zhang    Level: beginner
27800e6b8875SHong Zhang 
27810e6b8875SHong Zhang    References:
2782606c0280SSatish Balay .  * - MUMPS Users' Guide
27830e6b8875SHong Zhang 
27841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
27850e6b8875SHong Zhang @*/
2786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2787d71ae5a4SJacob Faibussowitsch {
27880e6b8875SHong Zhang   PetscBool flg;
27890e6b8875SHong Zhang 
27900e6b8875SHong Zhang   PetscFunctionBegin;
27910e6b8875SHong Zhang   PetscValidType(F, 1);
279228b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
27939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
279428b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
27950e6b8875SHong Zhang 
2796cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
27973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27980e6b8875SHong Zhang }
27990e6b8875SHong Zhang 
2800a21f80fcSHong Zhang /*@
2801a21f80fcSHong Zhang   MatMumpsGetInfo - Get MUMPS parameter INFO()
2802a21f80fcSHong Zhang 
2803c3339decSBarry Smith    Logically Collective
2804a21f80fcSHong Zhang 
2805a21f80fcSHong Zhang    Input Parameters:
280611a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2807a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array INFO()
2808a21f80fcSHong Zhang 
2809a21f80fcSHong Zhang   Output Parameter:
2810a21f80fcSHong Zhang .  ival - value of MUMPS INFO(icntl)
2811a21f80fcSHong Zhang 
2812a21f80fcSHong Zhang    Level: beginner
2813a21f80fcSHong Zhang 
281496a0c994SBarry Smith    References:
2815606c0280SSatish Balay .  * - MUMPS Users' Guide
2816a21f80fcSHong Zhang 
28171cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2818a21f80fcSHong Zhang @*/
2819d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2820d71ae5a4SJacob Faibussowitsch {
2821bc6112feSHong Zhang   PetscFunctionBegin;
28222989dfd4SHong Zhang   PetscValidType(F, 1);
282328b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2824ca810319SHong Zhang   PetscValidIntPointer(ival, 3);
2825cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
28263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2827bc6112feSHong Zhang }
2828bc6112feSHong Zhang 
2829a21f80fcSHong Zhang /*@
2830a21f80fcSHong Zhang   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2831a21f80fcSHong Zhang 
2832c3339decSBarry Smith    Logically Collective
2833a21f80fcSHong Zhang 
2834a21f80fcSHong Zhang    Input Parameters:
283511a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2836a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array INFOG()
2837a21f80fcSHong Zhang 
2838a21f80fcSHong Zhang   Output Parameter:
2839a21f80fcSHong Zhang .  ival - value of MUMPS INFOG(icntl)
2840a21f80fcSHong Zhang 
2841a21f80fcSHong Zhang    Level: beginner
2842a21f80fcSHong Zhang 
284396a0c994SBarry Smith    References:
2844606c0280SSatish Balay .  * - MUMPS Users' Guide
2845a21f80fcSHong Zhang 
28461cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2847a21f80fcSHong Zhang @*/
2848d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2849d71ae5a4SJacob Faibussowitsch {
2850bc6112feSHong Zhang   PetscFunctionBegin;
28512989dfd4SHong Zhang   PetscValidType(F, 1);
285228b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2853ca810319SHong Zhang   PetscValidIntPointer(ival, 3);
2854cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
28553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2856bc6112feSHong Zhang }
2857bc6112feSHong Zhang 
2858a21f80fcSHong Zhang /*@
2859a21f80fcSHong Zhang   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2860a21f80fcSHong Zhang 
2861c3339decSBarry Smith    Logically Collective
2862a21f80fcSHong Zhang 
2863a21f80fcSHong Zhang    Input Parameters:
286411a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2865a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array RINFO()
2866a21f80fcSHong Zhang 
2867a21f80fcSHong Zhang   Output Parameter:
2868a21f80fcSHong Zhang .  val - value of MUMPS RINFO(icntl)
2869a21f80fcSHong Zhang 
2870a21f80fcSHong Zhang    Level: beginner
2871a21f80fcSHong Zhang 
287296a0c994SBarry Smith    References:
2873606c0280SSatish Balay .  * - MUMPS Users' Guide
2874a21f80fcSHong Zhang 
28751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2876a21f80fcSHong Zhang @*/
2877d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2878d71ae5a4SJacob Faibussowitsch {
2879bc6112feSHong Zhang   PetscFunctionBegin;
28802989dfd4SHong Zhang   PetscValidType(F, 1);
288128b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2882bc6112feSHong Zhang   PetscValidRealPointer(val, 3);
2883cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
28843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2885bc6112feSHong Zhang }
2886bc6112feSHong Zhang 
2887a21f80fcSHong Zhang /*@
2888a21f80fcSHong Zhang   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2889a21f80fcSHong Zhang 
2890c3339decSBarry Smith    Logically Collective
2891a21f80fcSHong Zhang 
2892a21f80fcSHong Zhang    Input Parameters:
289311a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2894a21f80fcSHong Zhang -  icntl - index of MUMPS parameter array RINFOG()
2895a21f80fcSHong Zhang 
2896a21f80fcSHong Zhang   Output Parameter:
2897a21f80fcSHong Zhang .  val - value of MUMPS RINFOG(icntl)
2898a21f80fcSHong Zhang 
2899a21f80fcSHong Zhang    Level: beginner
2900a21f80fcSHong Zhang 
290196a0c994SBarry Smith    References:
2902606c0280SSatish Balay .  * - MUMPS Users' Guide
2903a21f80fcSHong Zhang 
29041cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2905a21f80fcSHong Zhang @*/
2906d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2907d71ae5a4SJacob Faibussowitsch {
2908bc6112feSHong Zhang   PetscFunctionBegin;
29092989dfd4SHong Zhang   PetscValidType(F, 1);
291028b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2911bc6112feSHong Zhang   PetscValidRealPointer(val, 3);
2912cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
29133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2914bc6112feSHong Zhang }
2915bc6112feSHong Zhang 
29165c0bae8cSAshish Patel /*@
29175c0bae8cSAshish Patel   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
29185c0bae8cSAshish Patel 
29195c0bae8cSAshish Patel    Logically Collective
29205c0bae8cSAshish Patel 
29215c0bae8cSAshish Patel    Input Parameter:
29225c0bae8cSAshish Patel .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
29235c0bae8cSAshish Patel 
29245c0bae8cSAshish Patel   Output Parameters:
29255c0bae8cSAshish Patel +  size - local size of the array. The size of the array is non-zero only on the host.
29265c0bae8cSAshish Patel -  array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
29275c0bae8cSAshish Patel            for freeing this array.
29285c0bae8cSAshish Patel 
29295c0bae8cSAshish Patel    Level: beginner
29305c0bae8cSAshish Patel 
29315c0bae8cSAshish Patel    References:
29325c0bae8cSAshish Patel .  * - MUMPS Users' Guide
29335c0bae8cSAshish Patel 
29341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
29355c0bae8cSAshish Patel @*/
29365c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
29375c0bae8cSAshish Patel {
29385c0bae8cSAshish Patel   PetscFunctionBegin;
29395c0bae8cSAshish Patel   PetscValidType(F, 1);
29405c0bae8cSAshish Patel   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
29415c0bae8cSAshish Patel   PetscValidIntPointer(size, 3);
29425c0bae8cSAshish Patel   PetscValidPointer(array, 4);
29435c0bae8cSAshish Patel   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
29445c0bae8cSAshish Patel   PetscFunctionReturn(PETSC_SUCCESS);
29455c0bae8cSAshish Patel }
29465c0bae8cSAshish Patel 
294724b6179bSKris Buschelman /*MC
29482692d6eeSBarry Smith   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
294924b6179bSKris Buschelman   distributed and sequential matrices via the external package MUMPS.
295024b6179bSKris Buschelman 
295111a5261eSBarry Smith   Works with `MATAIJ` and `MATSBAIJ` matrices
295224b6179bSKris Buschelman 
2953c2b89b5dSBarry Smith   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2954c2b89b5dSBarry Smith 
29552ef1f0ffSBarry Smith   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
29562ef1f0ffSBarry Smith   See details below.
2957217d3b1eSJunchao Zhang 
29582ef1f0ffSBarry Smith   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
2959c2b89b5dSBarry Smith 
296024b6179bSKris Buschelman   Options Database Keys:
29614422a9fcSPatrick Sanan +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
29624422a9fcSPatrick Sanan .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
29634422a9fcSPatrick Sanan .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
29644422a9fcSPatrick Sanan .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
29654422a9fcSPatrick Sanan .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2966b53c1a7fSBarry Smith .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2967b53c1a7fSBarry Smith                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
29684422a9fcSPatrick Sanan .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
29694422a9fcSPatrick Sanan .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
29704422a9fcSPatrick Sanan .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
29714422a9fcSPatrick Sanan .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
29724422a9fcSPatrick Sanan .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
29734422a9fcSPatrick Sanan .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
297445e3843bSPierre Jolivet .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
29754422a9fcSPatrick Sanan .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
297625aac85cSJunchao Zhang .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
29774422a9fcSPatrick Sanan .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
29784422a9fcSPatrick Sanan .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
29794422a9fcSPatrick Sanan .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
29804422a9fcSPatrick Sanan .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
29814422a9fcSPatrick Sanan .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
29824422a9fcSPatrick Sanan .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
29834422a9fcSPatrick Sanan .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
29844422a9fcSPatrick Sanan .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
29854422a9fcSPatrick Sanan .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
29864422a9fcSPatrick Sanan .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2987a0e18203SThibaut Appel .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2988a0e18203SThibaut Appel .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2989a0e18203SThibaut Appel .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
29904422a9fcSPatrick Sanan .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
29914422a9fcSPatrick Sanan .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
29924422a9fcSPatrick Sanan .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
29934422a9fcSPatrick Sanan .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2994217d3b1eSJunchao Zhang .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2995a0e18203SThibaut Appel .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2996217d3b1eSJunchao Zhang -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2997217d3b1eSJunchao Zhang                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
299824b6179bSKris Buschelman 
299924b6179bSKris Buschelman   Level: beginner
300024b6179bSKris Buschelman 
300195452b02SPatrick Sanan     Notes:
30022ef1f0ffSBarry Smith     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
30032ef1f0ffSBarry Smith     error if the matrix is Hermitian.
300438548759SBarry Smith 
300526cc229bSBarry Smith     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
300626cc229bSBarry Smith     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
300726cc229bSBarry Smith 
30082ef1f0ffSBarry Smith     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
30092ef1f0ffSBarry Smith     the failure with
30102ef1f0ffSBarry Smith .vb
30112ef1f0ffSBarry Smith           KSPGetPC(ksp,&pc);
30122ef1f0ffSBarry Smith           PCFactorGetMatrix(pc,&mat);
30132ef1f0ffSBarry Smith           MatMumpsGetInfo(mat,....);
30142ef1f0ffSBarry Smith           MatMumpsGetInfog(mat,....); etc.
30152ef1f0ffSBarry Smith .ve
30162ef1f0ffSBarry Smith     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
30179fc87aa7SBarry Smith 
3018a5399872SJunchao Zhang     MUMPS provides 64-bit integer support in two build modes:
3019a5399872SJunchao Zhang       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3020a5399872SJunchao Zhang       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
30218fcaa860SBarry Smith 
3022a5399872SJunchao Zhang       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3023a5399872SJunchao Zhang       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3024a5399872SJunchao Zhang       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3025a5399872SJunchao Zhang       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3026a5399872SJunchao Zhang 
3027a5399872SJunchao Zhang     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3028a5399872SJunchao Zhang 
3029a5399872SJunchao Zhang   Two modes to run MUMPS/PETSc with OpenMP
30302ef1f0ffSBarry Smith .vb
30312ef1f0ffSBarry Smith      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
30322ef1f0ffSBarry Smith      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
30332ef1f0ffSBarry Smith .ve
30348fcaa860SBarry Smith 
30352ef1f0ffSBarry Smith .vb
30362ef1f0ffSBarry Smith      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
30372ef1f0ffSBarry Smith     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
30382ef1f0ffSBarry Smith .ve
30398fcaa860SBarry Smith 
30408fcaa860SBarry Smith    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
30412ef1f0ffSBarry Smith    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
30422ef1f0ffSBarry Smith    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
30438fcaa860SBarry Smith    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
30448fcaa860SBarry Smith    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3045217d3b1eSJunchao Zhang 
30468fcaa860SBarry Smith    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3047217d3b1eSJunchao Zhang    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3048217d3b1eSJunchao Zhang    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3049217d3b1eSJunchao Zhang    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3050217d3b1eSJunchao Zhang    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3051217d3b1eSJunchao Zhang    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3052217d3b1eSJunchao Zhang    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3053217d3b1eSJunchao Zhang    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3054217d3b1eSJunchao Zhang    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3055217d3b1eSJunchao Zhang    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
30568fcaa860SBarry Smith    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3057217d3b1eSJunchao Zhang    examine the mapping result.
3058217d3b1eSJunchao Zhang 
305911a5261eSBarry Smith    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
306011a5261eSBarry Smith    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
306111a5261eSBarry Smith    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3062217d3b1eSJunchao Zhang 
3063217d3b1eSJunchao Zhang    References:
3064606c0280SSatish Balay +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3065606c0280SSatish Balay -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3066217d3b1eSJunchao Zhang 
30671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
306824b6179bSKris Buschelman M*/
306924b6179bSKris Buschelman 
3070d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3071d71ae5a4SJacob Faibussowitsch {
307235bd34faSBarry Smith   PetscFunctionBegin;
30732692d6eeSBarry Smith   *type = MATSOLVERMUMPS;
30743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
307535bd34faSBarry Smith }
307635bd34faSBarry Smith 
3077bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */
3078d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3079d71ae5a4SJacob Faibussowitsch {
30802877fffaSHong Zhang   Mat         B;
30812877fffaSHong Zhang   Mat_MUMPS  *mumps;
3082ace3abfcSBarry Smith   PetscBool   isSeqAIJ;
30832c7c0729SBarry Smith   PetscMPIInt size;
30842877fffaSHong Zhang 
30852877fffaSHong Zhang   PetscFunctionBegin;
3086eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
3087b94d7dedSBarry Smith   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3088eb1ec7c1SStefano Zampini #endif
30892877fffaSHong Zhang   /* Create the factorization matrix */
30909566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
30919566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
30929566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
30939566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
30949566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
30952877fffaSHong Zhang 
30964dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
30972205254eSKarl Rupp 
30982877fffaSHong Zhang   B->ops->view    = MatView_MUMPS;
309935bd34faSBarry Smith   B->ops->getinfo = MatGetInfo_MUMPS;
31002205254eSKarl Rupp 
31019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
31029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
31039566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
31049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
31059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
31069566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
31079566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
31089566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
31099566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
31109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
31119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
31125c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
31139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
31149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
31156444a565SStefano Zampini 
3116450b117fSShri Abhyankar   if (ftype == MAT_FACTOR_LU) {
3117450b117fSShri Abhyankar     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3118d5f3da31SBarry Smith     B->factortype            = MAT_FACTOR_LU;
3119bccb9932SShri Abhyankar     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3120bccb9932SShri Abhyankar     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
31219566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3122746480a1SHong Zhang     mumps->sym = 0;
3123dcd589f8SShri Abhyankar   } else {
312467877ebaSShri Abhyankar     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3125450b117fSShri Abhyankar     B->factortype                  = MAT_FACTOR_CHOLESKY;
3126bccb9932SShri Abhyankar     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3127bccb9932SShri Abhyankar     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
31289566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
312959ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX)
313059ac8732SStefano Zampini     mumps->sym = 2;
313159ac8732SStefano Zampini #else
3132b94d7dedSBarry Smith     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
31336fdc2a6dSBarry Smith     else mumps->sym = 2;
313459ac8732SStefano Zampini #endif
3135450b117fSShri Abhyankar   }
31362877fffaSHong Zhang 
313700c67f3bSHong Zhang   /* set solvertype */
31389566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
31399566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
31409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
31412c7c0729SBarry Smith   if (size == 1) {
31424ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3143f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
31442c7c0729SBarry Smith   }
31452877fffaSHong Zhang   B->ops->destroy = MatDestroy_MUMPS;
3146e69c285eSBarry Smith   B->data         = (void *)mumps;
31472205254eSKarl Rupp 
31482877fffaSHong Zhang   *F               = B;
3149413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
3150413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
3151413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
3152d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
31533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31542877fffaSHong Zhang }
31552877fffaSHong Zhang 
3156bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */
3157d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3158d71ae5a4SJacob Faibussowitsch {
31592877fffaSHong Zhang   Mat         B;
31602877fffaSHong Zhang   Mat_MUMPS  *mumps;
3161ace3abfcSBarry Smith   PetscBool   isSeqSBAIJ;
31622c7c0729SBarry Smith   PetscMPIInt size;
31632877fffaSHong Zhang 
31642877fffaSHong Zhang   PetscFunctionBegin;
3165eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
3166b94d7dedSBarry Smith   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3167eb1ec7c1SStefano Zampini #endif
31689566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
31699566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
31709566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
31719566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
3172e69c285eSBarry Smith 
31734dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
31749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3175bccb9932SShri Abhyankar   if (isSeqSBAIJ) {
317616ebf90aSShri Abhyankar     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3177dcd589f8SShri Abhyankar   } else {
3178bccb9932SShri Abhyankar     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3179bccb9932SShri Abhyankar   }
3180bccb9932SShri Abhyankar 
318167877ebaSShri Abhyankar   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3182bccb9932SShri Abhyankar   B->ops->view                   = MatView_MUMPS;
3183722b6324SPierre Jolivet   B->ops->getinfo                = MatGetInfo_MUMPS;
31842205254eSKarl Rupp 
31859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
31869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
31879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
31889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
31899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
31909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
31919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
31929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
31939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
31949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
31959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
31965c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
31979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
31989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
31992205254eSKarl Rupp 
3200f4762488SHong Zhang   B->factortype = MAT_FACTOR_CHOLESKY;
320159ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX)
320259ac8732SStefano Zampini   mumps->sym = 2;
320359ac8732SStefano Zampini #else
3204b94d7dedSBarry Smith   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
32056fdc2a6dSBarry Smith   else mumps->sym = 2;
320659ac8732SStefano Zampini #endif
3207a214ac2aSShri Abhyankar 
320800c67f3bSHong Zhang   /* set solvertype */
32099566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
32109566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
32119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
32122c7c0729SBarry Smith   if (size == 1) {
32134ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3214f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
32152c7c0729SBarry Smith   }
32169566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3217f3c0ef26SHong Zhang   B->ops->destroy = MatDestroy_MUMPS;
3218e69c285eSBarry Smith   B->data         = (void *)mumps;
32192205254eSKarl Rupp 
32202877fffaSHong Zhang   *F               = B;
3221413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
3222413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
3223413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
3224d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
32253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32262877fffaSHong Zhang }
322797969023SHong Zhang 
3228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3229d71ae5a4SJacob Faibussowitsch {
323067877ebaSShri Abhyankar   Mat         B;
323167877ebaSShri Abhyankar   Mat_MUMPS  *mumps;
3232ace3abfcSBarry Smith   PetscBool   isSeqBAIJ;
32332c7c0729SBarry Smith   PetscMPIInt size;
323467877ebaSShri Abhyankar 
323567877ebaSShri Abhyankar   PetscFunctionBegin;
323667877ebaSShri Abhyankar   /* Create the factorization matrix */
32379566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
32389566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
32399566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
32409566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
32419566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
3242450b117fSShri Abhyankar 
32434dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
3244450b117fSShri Abhyankar   if (ftype == MAT_FACTOR_LU) {
3245450b117fSShri Abhyankar     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3246450b117fSShri Abhyankar     B->factortype            = MAT_FACTOR_LU;
3247bccb9932SShri Abhyankar     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3248bccb9932SShri Abhyankar     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3249746480a1SHong Zhang     mumps->sym = 0;
32509566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3251546078acSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3252bccb9932SShri Abhyankar 
3253450b117fSShri Abhyankar   B->ops->view    = MatView_MUMPS;
3254722b6324SPierre Jolivet   B->ops->getinfo = MatGetInfo_MUMPS;
32552205254eSKarl Rupp 
32569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
32579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
32589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
32599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
32609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
32619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
32629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
32639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
32649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
32659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
32669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
32675c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
32689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
32699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3270450b117fSShri Abhyankar 
327100c67f3bSHong Zhang   /* set solvertype */
32729566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
32739566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
32749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
32752c7c0729SBarry Smith   if (size == 1) {
32764ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3277f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
32782c7c0729SBarry Smith   }
32797ee00b23SStefano Zampini   B->ops->destroy = MatDestroy_MUMPS;
32807ee00b23SStefano Zampini   B->data         = (void *)mumps;
32817ee00b23SStefano Zampini 
32827ee00b23SStefano Zampini   *F               = B;
3283413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
3284413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
3285413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
3286d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
32873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32887ee00b23SStefano Zampini }
32897ee00b23SStefano Zampini 
32907ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */
3291d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3292d71ae5a4SJacob Faibussowitsch {
32937ee00b23SStefano Zampini   Mat         B;
32947ee00b23SStefano Zampini   Mat_MUMPS  *mumps;
32957ee00b23SStefano Zampini   PetscBool   isSeqSELL;
32962c7c0729SBarry Smith   PetscMPIInt size;
32977ee00b23SStefano Zampini 
32987ee00b23SStefano Zampini   PetscFunctionBegin;
32997ee00b23SStefano Zampini   /* Create the factorization matrix */
33009566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
33019566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
33029566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
33039566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
33049566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
33057ee00b23SStefano Zampini 
33064dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
33077ee00b23SStefano Zampini 
33087ee00b23SStefano Zampini   B->ops->view    = MatView_MUMPS;
33097ee00b23SStefano Zampini   B->ops->getinfo = MatGetInfo_MUMPS;
33107ee00b23SStefano Zampini 
33119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
33129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
33139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
33149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
33159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
33169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
33179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
33189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
33199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
33209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
33219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
33225c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
33237ee00b23SStefano Zampini 
33247ee00b23SStefano Zampini   if (ftype == MAT_FACTOR_LU) {
33257ee00b23SStefano Zampini     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
33267ee00b23SStefano Zampini     B->factortype            = MAT_FACTOR_LU;
33277ee00b23SStefano Zampini     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
33287ee00b23SStefano Zampini     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
33297ee00b23SStefano Zampini     mumps->sym = 0;
33309566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
33317ee00b23SStefano Zampini   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
33327ee00b23SStefano Zampini 
33337ee00b23SStefano Zampini   /* set solvertype */
33349566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
33359566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
33369566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
33372c7c0729SBarry Smith   if (size == 1) {
33384ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3339f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
33402c7c0729SBarry Smith   }
3341450b117fSShri Abhyankar   B->ops->destroy = MatDestroy_MUMPS;
3342e69c285eSBarry Smith   B->data         = (void *)mumps;
33432205254eSKarl Rupp 
3344450b117fSShri Abhyankar   *F               = B;
3345413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
3346413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
3347413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
3348d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
33493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3350450b117fSShri Abhyankar }
335142c9c57cSBarry Smith 
3352d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3353d71ae5a4SJacob Faibussowitsch {
335442c9c57cSBarry Smith   PetscFunctionBegin;
33559566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
33569566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
33579566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
33589566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
33599566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
33609566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
33619566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
33629566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
33639566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
33649566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
33659566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
33663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
336742c9c57cSBarry Smith }
3368