xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 /*
2         Provides an interface to the SuperLU_DIST sparse solver
3 */
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 #include <petscpkg_version.h>
8 
9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #define CASTDOUBLECOMPLEX     (doublecomplex *)
13   #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
14   #include <superlu_zdefs.h>
15   #define LUstructInit                  zLUstructInit
16   #define ScalePermstructInit           zScalePermstructInit
17   #define ScalePermstructFree           zScalePermstructFree
18   #define LUstructFree                  zLUstructFree
19   #define Destroy_LU                    zDestroy_LU
20   #define ScalePermstruct_t             zScalePermstruct_t
21   #define LUstruct_t                    zLUstruct_t
22   #define SOLVEstruct_t                 zSOLVEstruct_t
23   #define SolveFinalize                 zSolveFinalize
24   #define pGetDiagU                     pzGetDiagU
25   #define pgssvx                        pzgssvx
26   #define allocateA_dist                zallocateA_dist
27   #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
28   #define SLU                           SLU_Z
29   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
30     #define DeAllocLlu_3d              zDeAllocLlu_3d
31     #define DeAllocGlu_3d              zDeAllocGlu_3d
32     #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
33     #define pgssvx3d                   pzgssvx3d
34   #endif
35 #elif defined(PETSC_USE_REAL_SINGLE)
36   #define CASTDOUBLECOMPLEX
37   #define CASTDOUBLECOMPLEXSTAR
38   #include <superlu_sdefs.h>
39   #define LUstructInit                  sLUstructInit
40   #define ScalePermstructInit           sScalePermstructInit
41   #define ScalePermstructFree           sScalePermstructFree
42   #define LUstructFree                  sLUstructFree
43   #define Destroy_LU                    sDestroy_LU
44   #define ScalePermstruct_t             sScalePermstruct_t
45   #define LUstruct_t                    sLUstruct_t
46   #define SOLVEstruct_t                 sSOLVEstruct_t
47   #define SolveFinalize                 sSolveFinalize
48   #define pGetDiagU                     psGetDiagU
49   #define pgssvx                        psgssvx
50   #define allocateA_dist                sallocateA_dist
51   #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
52   #define SLU                           SLU_S
53   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
54     #define DeAllocLlu_3d              sDeAllocLlu_3d
55     #define DeAllocGlu_3d              sDeAllocGlu_3d
56     #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
57     #define pgssvx3d                   psgssvx3d
58   #endif
59 #else
60   #define CASTDOUBLECOMPLEX
61   #define CASTDOUBLECOMPLEXSTAR
62   #include <superlu_ddefs.h>
63   #define LUstructInit                  dLUstructInit
64   #define ScalePermstructInit           dScalePermstructInit
65   #define ScalePermstructFree           dScalePermstructFree
66   #define LUstructFree                  dLUstructFree
67   #define Destroy_LU                    dDestroy_LU
68   #define ScalePermstruct_t             dScalePermstruct_t
69   #define LUstruct_t                    dLUstruct_t
70   #define SOLVEstruct_t                 dSOLVEstruct_t
71   #define SolveFinalize                 dSolveFinalize
72   #define pGetDiagU                     pdGetDiagU
73   #define pgssvx                        pdgssvx
74   #define allocateA_dist                dallocateA_dist
75   #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
76   #define SLU                           SLU_D
77   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
78     #define DeAllocLlu_3d              dDeAllocLlu_3d
79     #define DeAllocGlu_3d              dDeAllocGlu_3d
80     #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
81     #define pgssvx3d                   pdgssvx3d
82   #endif
83 #endif
84 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
85   #include <superlu_sdefs.h>
86 #endif
87 EXTERN_C_END
88 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
89 
90 typedef struct {
91   int_t      nprow, npcol, *row, *col;
92   gridinfo_t grid;
93 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
94   PetscBool    use3d;
95   int_t        npdep; /* replication factor, must be power of two */
96   gridinfo3d_t grid3d;
97 #endif
98   superlu_dist_options_t options;
99   SuperMatrix            A_sup;
100   ScalePermstruct_t      ScalePermstruct;
101   LUstruct_t             LUstruct;
102   int                    StatPrint;
103   SOLVEstruct_t          SOLVEstruct;
104   fact_t                 FactPattern;
105   MPI_Comm               comm_superlu;
106   PetscScalar           *val;
107   PetscBool              matsolve_iscalled, matmatsolve_iscalled;
108   PetscBool              CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
109 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
110   sScalePermstruct_t sScalePermstruct;
111   sLUstruct_t        sLUstruct;
112   sSOLVEstruct_t     sSOLVEstruct;
113   float             *sval;
114   PetscBool          singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */
115   float             *sbptr;
116 #endif
117 } Mat_SuperLU_DIST;
118 
119 static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
120 {
121   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
122 
123   PetscFunctionBegin;
124   PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
129 {
130   PetscFunctionBegin;
131   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
132   PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /*  This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
137 typedef struct {
138   MPI_Comm   comm;
139   PetscBool  busy;
140   gridinfo_t grid;
141 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
142   PetscBool    use3d;
143   gridinfo3d_t grid3d;
144 #endif
145 } PetscSuperLU_DIST;
146 
147 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
148 
149 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
150 {
151   PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
152 
153   PetscFunctionBegin;
154   PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
155 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
156   if (context->use3d) {
157     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
158   } else
159 #endif
160     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
161   PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm));
162   PetscCallReturnMPI(PetscFree(context));
163   PetscFunctionReturn(MPI_SUCCESS);
164 }
165 
166 /*
167    Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
168    users who do not destroy all PETSc objects before PetscFinalize().
169 
170    The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn()
171    can still check that the keyval associated with the MPI communicator is correct when the MPI
172    communicator is destroyed.
173 
174    This is called in PetscFinalize()
175 */
176 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
177 {
178   PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
179 
180   PetscFunctionBegin;
181   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
186 {
187   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
188 
189   PetscFunctionBegin;
190 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
191   PetscCall(PetscFree(lu->sbptr));
192 #endif
193   if (lu->CleanUpSuperLU_Dist) {
194     /* Deallocate SuperLU_DIST storage */
195     PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
196     if (lu->options.SolveInitialized) {
197 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
198       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
199       else
200 #endif
201         PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
202     }
203 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
204     if (lu->use3d) {
205   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
206       if (lu->singleprecision) {
207         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
208         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d));
209         PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
210         PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
211       } else
212   #endif
213       {
214         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
215         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
216         PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
217         PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
218       }
219     } else
220 #endif
221 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
222       if (lu->singleprecision) {
223       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct));
224       PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
225       PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
226     } else
227 #endif
228     {
229       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
230       PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
231       PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
232     }
233     /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
234     if (lu->comm_superlu) {
235 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
236       if (lu->use3d) {
237         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
238       } else
239 #endif
240         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
241     }
242   }
243   /*
244    * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
245    * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
246    * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
247    * is off, and the communicator was not released or marked as "not busy " in the old code.
248    * Here we try to release comm regardless.
249   */
250   if (lu->comm_superlu) {
251     PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
252   } else {
253     PetscSuperLU_DIST *context;
254     MPI_Comm           comm;
255     PetscMPIInt        flg;
256 
257     PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
258     PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
259     if (flg) context->busy = PETSC_FALSE;
260   }
261 
262   PetscCall(PetscFree(A->data));
263   /* clear composed functions */
264   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
265   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
266   PetscFunctionReturn(PETSC_SUCCESS);
267 }
268 
269 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
270 {
271   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
272   PetscInt          m  = A->rmap->n;
273   SuperLUStat_t     stat;
274   PetscReal         berr[1];
275   PetscScalar      *bptr = NULL;
276 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
277   float sberr[1];
278 #endif
279   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
280   static PetscBool cite = PETSC_FALSE;
281 
282   PetscFunctionBegin;
283   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
284   PetscCall(PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM "
285                                    "Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",
286                                    &cite));
287 
288   if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
289     /* see comments in MatMatSolve() */
290 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
291     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
292     else
293 #endif
294       PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
295     lu->options.SolveInitialized = NO;
296   }
297   PetscCall(VecCopy(b_mpi, x));
298   PetscCall(VecGetArray(x, &bptr));
299 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
300   if (lu->singleprecision) {
301     PetscInt n;
302     PetscCall(VecGetLocalSize(x, &n));
303     if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr));
304     for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
305   }
306 #endif
307 
308   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
309 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
310   if (lu->use3d) {
311   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
312     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
313     else
314   #endif
315       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
316     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info);
317   } else
318 #endif
319   {
320 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
321     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
322     else
323 #endif
324       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
325     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info);
326   }
327   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
328   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
329 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
330   if (lu->singleprecision) {
331     PetscInt n;
332     PetscCall(VecGetLocalSize(x, &n));
333     for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i];
334   }
335 #endif
336   PetscCall(VecRestoreArray(x, &bptr));
337   lu->matsolve_iscalled    = PETSC_TRUE;
338   lu->matmatsolve_iscalled = PETSC_FALSE;
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
343 {
344   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
345   PetscInt          m  = A->rmap->n, nrhs;
346   SuperLUStat_t     stat;
347   PetscReal         berr[1];
348   PetscScalar      *bptr;
349   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
350   PetscBool         flg;
351 
352   PetscFunctionBegin;
353 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
354   PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single");
355 #endif
356   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
357   PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
358   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
359   if (X != B_mpi) {
360     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
361     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
362   }
363 
364   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
365     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
366        thus destroy it and create a new SOLVEstruct.
367        Otherwise it may result in memory corruption or incorrect solution
368        See src/mat/tests/ex125.c */
369     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
370     lu->options.SolveInitialized = NO;
371   }
372   if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
373 
374   PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
375 
376   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
377   PetscCall(MatDenseGetArray(X, &bptr));
378 
379 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
380   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
381   else
382 #endif
383     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
384   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
385   PetscCall(MatDenseRestoreArray(X, &bptr));
386 
387   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
388   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
389   lu->matsolve_iscalled    = PETSC_FALSE;
390   lu->matmatsolve_iscalled = PETSC_TRUE;
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 /*
395   input:
396    F:        numeric Cholesky factor
397   output:
398    nneg:     total number of negative pivots
399    nzero:    total number of zero pivots
400    npos:     (global dimension of F) - nneg - nzero
401 */
402 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
403 {
404   Mat_SuperLU_DIST *lu    = (Mat_SuperLU_DIST *)F->data;
405   PetscScalar      *diagU = NULL;
406   PetscInt          M, i, neg = 0, zero = 0, pos = 0;
407   PetscReal         r;
408 
409   PetscFunctionBegin;
410 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
411   PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single");
412 #endif
413   PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
414   PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
415   PetscCall(MatGetSize(F, &M, NULL));
416   PetscCall(PetscMalloc1(M, &diagU));
417   PetscCall(MatSuperluDistGetDiagU(F, diagU));
418   for (i = 0; i < M; i++) {
419 #if defined(PETSC_USE_COMPLEX)
420     r = PetscImaginaryPart(diagU[i]) / 10.0;
421     PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
422     r = PetscRealPart(diagU[i]);
423 #else
424     r = diagU[i];
425 #endif
426     if (r > 0) {
427       pos++;
428     } else if (r < 0) {
429       neg++;
430     } else zero++;
431   }
432 
433   PetscCall(PetscFree(diagU));
434   if (nneg) *nneg = neg;
435   if (nzero) *nzero = zero;
436   if (npos) *npos = pos;
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
440 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
441 {
442   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
443   Mat                Aloc;
444   const PetscScalar *av;
445   const PetscInt    *ai = NULL, *aj = NULL;
446   PetscInt           nz, dummy;
447   int                sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
448   SuperLUStat_t      stat;
449   PetscReal         *berr = 0;
450 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
451   float *sberr = 0;
452 #endif
453   PetscBool ismpiaij, isseqaij, flg;
454 
455   PetscFunctionBegin;
456   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
457   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
458   if (ismpiaij) {
459     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
460   } else if (isseqaij) {
461     PetscCall(PetscObjectReference((PetscObject)A));
462     Aloc = A;
463   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
464 
465   PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
466   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
467   PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
468   nz = ai[Aloc->rmap->n];
469 
470   /* Allocations for A_sup */
471   if (lu->options.Fact == DOFACT) { /* first numeric factorization */
472 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
473     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
474     else
475 #endif
476       PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
477   } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
478     if (lu->FactPattern == SamePattern_SameRowPerm) {
479       lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
480     } else if (lu->FactPattern == SamePattern) {
481 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
482       if (lu->use3d) {
483   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
484         if (lu->singleprecision) {
485           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
486           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
487         } else
488   #endif
489         {
490           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
491           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
492         }
493       } else
494 #endif
495 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
496         if (lu->singleprecision)
497         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
498       else
499 #endif
500         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
501       lu->options.Fact = SamePattern;
502     } else if (lu->FactPattern == DOFACT) {
503       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
504 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
505       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
506       else
507 #endif
508         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
509       lu->options.Fact = DOFACT;
510 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
511       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
512       else
513 #endif
514         PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
515     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
516   }
517 
518   /* Copy AIJ matrix to superlu_dist matrix */
519   PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
520   PetscCall(PetscArraycpy(lu->col, aj, nz));
521 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
522   if (lu->singleprecision)
523     for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
524   else
525 #endif
526     PetscCall(PetscArraycpy(lu->val, av, nz));
527   PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
528   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
529   PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
530   PetscCall(MatDestroy(&Aloc));
531 
532   /* Create and setup A_sup */
533   if (lu->options.Fact == DOFACT) {
534 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
535     if (lu->singleprecision)
536       PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE));
537     else
538 #endif
539       PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
540   }
541 
542   /* Factor the matrix. */
543   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
544 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
545   if (lu->use3d) {
546   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
547     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
548     else
549   #endif
550       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
551   } else
552 #endif
553   {
554 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
555     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
556     else
557 #endif
558       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
559   }
560   if (sinfo > 0) {
561     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
562     if (sinfo <= lu->A_sup.ncol) {
563       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
564       PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
565     } else if (sinfo > lu->A_sup.ncol) {
566       /*
567        number of bytes allocated when memory allocation
568        failure occurred, plus A->ncol.
569        */
570       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
571       PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
572     }
573   } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
574 
575   if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
576   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
577   F->assembled     = PETSC_TRUE;
578   F->preallocated  = PETSC_TRUE;
579   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 /* Note the Petsc r and c permutations are ignored */
584 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
585 {
586   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
587   PetscInt           M = A->rmap->N, N = A->cmap->N, indx;
588   PetscMPIInt        size, mpiflg;
589   PetscBool          flg, set;
590   const char        *colperm[]     = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
591   const char        *rowperm[]     = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
592   const char        *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
593   MPI_Comm           comm;
594   PetscSuperLU_DIST *context = NULL;
595 
596   PetscFunctionBegin;
597   /* Set options to F */
598   PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
599   PetscCallMPI(MPI_Comm_size(comm, &size));
600 
601   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
602   PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
603   if (set && !flg) lu->options.Equil = NO;
604 
605   PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
606   if (flg) {
607     switch (indx) {
608     case 0:
609       lu->options.RowPerm = NOROWPERM;
610       break;
611     case 1:
612       lu->options.RowPerm = LargeDiag_MC64;
613       break;
614     case 2:
615       lu->options.RowPerm = LargeDiag_AWPM;
616       break;
617     case 3:
618       lu->options.RowPerm = MY_PERMR;
619       break;
620     default:
621       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
622     }
623   }
624 
625   PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
626   if (flg) {
627     switch (indx) {
628     case 0:
629       lu->options.ColPerm = NATURAL;
630       break;
631     case 1:
632       lu->options.ColPerm = MMD_AT_PLUS_A;
633       break;
634     case 2:
635       lu->options.ColPerm = MMD_ATA;
636       break;
637     case 3:
638       lu->options.ColPerm = METIS_AT_PLUS_A;
639       break;
640     case 4:
641       lu->options.ColPerm = PARMETIS; /* only works for np>1 */
642       break;
643     default:
644       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
645     }
646   }
647 
648   lu->options.ReplaceTinyPivot = NO;
649   PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
650   if (set && flg) lu->options.ReplaceTinyPivot = YES;
651 
652   lu->options.ParSymbFact = NO;
653   PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
654   if (set && flg && size > 1) {
655 #if defined(PETSC_HAVE_PARMETIS)
656     lu->options.ParSymbFact = YES;
657     lu->options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
658 #else
659     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
660 #endif
661   }
662 
663   lu->FactPattern = SamePattern;
664   PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
665   if (flg) {
666     switch (indx) {
667     case 0:
668       lu->FactPattern = SamePattern;
669       break;
670     case 1:
671       lu->FactPattern = SamePattern_SameRowPerm;
672       break;
673     case 2:
674       lu->FactPattern = DOFACT;
675       break;
676     }
677   }
678 
679   lu->options.IterRefine = NOREFINE;
680   PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
681   if (set) {
682     if (flg) lu->options.IterRefine = SLU_DOUBLE;
683     else lu->options.IterRefine = NOREFINE;
684   }
685 
686   if (PetscLogPrintInfo) lu->options.PrintStat = YES;
687   else lu->options.PrintStat = NO;
688   PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
689   PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
690 
691   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
692   if (!mpiflg || context->busy) { /* additional options */
693     if (!mpiflg) {
694       PetscCall(PetscNew(&context));
695       context->busy = PETSC_TRUE;
696       PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
697       PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
698     } else {
699       PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
700     }
701 
702     /* Default number of process columns and rows */
703     lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
704     if (!lu->nprow) lu->nprow = 1;
705     while (lu->nprow > 0) {
706       lu->npcol = (int_t)(size / lu->nprow);
707       if (size == lu->nprow * lu->npcol) break;
708       lu->nprow--;
709     }
710 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
711     lu->use3d = PETSC_FALSE;
712     lu->npdep = 1;
713 #endif
714 
715 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
716     PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
717     if (lu->use3d) {
718       PetscInt t;
719       PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
720       t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
721       PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
722       if (lu->npdep > 1) {
723         lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
724         if (!lu->nprow) lu->nprow = 1;
725         while (lu->nprow > 0) {
726           lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
727           if (size == lu->nprow * lu->npcol * lu->npdep) break;
728           lu->nprow--;
729         }
730       }
731     }
732 #endif
733     PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
734     PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
735 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
736     PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
737 #else
738     PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
739 #endif
740     /* end of adding additional options */
741 
742 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
743     if (lu->use3d) {
744       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, (int)lu->npdep, &lu->grid3d));
745       if (context) {
746         context->grid3d = lu->grid3d;
747         context->use3d  = lu->use3d;
748       }
749     } else {
750 #endif
751       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, &lu->grid));
752       if (context) context->grid = lu->grid;
753 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
754     }
755 #endif
756     PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
757     if (mpiflg) {
758       PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
759     } else {
760       PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
761     }
762   } else { /* (mpiflg && !context->busy) */
763     PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n"));
764     context->busy = PETSC_TRUE;
765     lu->grid      = context->grid;
766   }
767   PetscOptionsEnd();
768 
769   /* Initialize ScalePermstruct and LUstruct. */
770 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
771   if (lu->singleprecision) {
772     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct));
773     PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct));
774   } else
775 #endif
776   {
777     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
778     PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
779   }
780   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
781   F->ops->solve           = MatSolve_SuperLU_DIST;
782   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
783   F->ops->getinertia      = NULL;
784 
785   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
786   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
791 {
792   PetscFunctionBegin;
793   PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
794   F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
795   PetscFunctionReturn(PETSC_SUCCESS);
796 }
797 
798 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
799 {
800   PetscFunctionBegin;
801   *type = MATSOLVERSUPERLU_DIST;
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
806 {
807   Mat_SuperLU_DIST      *lu = (Mat_SuperLU_DIST *)A->data;
808   superlu_dist_options_t options;
809 
810   PetscFunctionBegin;
811   /* check if matrix is superlu_dist type */
812   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
813 
814   options = lu->options;
815   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
816   /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
817    * format spec for int64_t is set to %d for whatever reason */
818   PetscCall(PetscViewerASCIIPrintf(viewer, "  Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
819 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
820   if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
821 #endif
822 
823   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
824   PetscCall(PetscViewerASCIIPrintf(viewer, "  Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
825   PetscCall(PetscViewerASCIIPrintf(viewer, "  Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
826   PetscCall(PetscViewerASCIIPrintf(viewer, "  Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
827 
828   switch (options.RowPerm) {
829   case NOROWPERM:
830     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation NOROWPERM\n"));
831     break;
832   case LargeDiag_MC64:
833     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_MC64\n"));
834     break;
835   case LargeDiag_AWPM:
836     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_AWPM\n"));
837     break;
838   case MY_PERMR:
839     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation MY_PERMR\n"));
840     break;
841   default:
842     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
843   }
844 
845   switch (options.ColPerm) {
846   case NATURAL:
847     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation NATURAL\n"));
848     break;
849   case MMD_AT_PLUS_A:
850     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_AT_PLUS_A\n"));
851     break;
852   case MMD_ATA:
853     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_ATA\n"));
854     break;
855   /*  Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
856   case METIS_AT_PLUS_A:
857     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation METIS_AT_PLUS_A\n"));
858     break;
859   case PARMETIS:
860     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation PARMETIS\n"));
861     break;
862   default:
863     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
864   }
865 
866   PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
867 
868   if (lu->FactPattern == SamePattern) {
869     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern\n"));
870   } else if (lu->FactPattern == SamePattern_SameRowPerm) {
871     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern_SameRowPerm\n"));
872   } else if (lu->FactPattern == DOFACT) {
873     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization DOFACT\n"));
874   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
875 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
876   if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using SuperLU_DIST in single precision\n"));
877 #endif
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
882 {
883   PetscBool         iascii;
884   PetscViewerFormat format;
885 
886   PetscFunctionBegin;
887   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
888   if (iascii) {
889     PetscCall(PetscViewerGetFormat(viewer, &format));
890     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
891   }
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
895 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
896 {
897   Mat                    B;
898   Mat_SuperLU_DIST      *lu;
899   PetscInt               M = A->rmap->N, N = A->cmap->N;
900   PetscMPIInt            size;
901   superlu_dist_options_t options;
902   PetscBool              flg;
903   char                   string[16];
904 
905   PetscFunctionBegin;
906   /* Create the factorization matrix */
907   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
908   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
909   PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
910   PetscCall(MatSetUp(B));
911   B->ops->getinfo = MatGetInfo_External;
912   B->ops->view    = MatView_SuperLU_DIST;
913   B->ops->destroy = MatDestroy_SuperLU_DIST;
914 
915   /* Set the default input options:
916      options.Fact              = DOFACT;
917      options.Equil             = YES;
918      options.ParSymbFact       = NO;
919      options.ColPerm           = METIS_AT_PLUS_A;
920      options.RowPerm           = LargeDiag_MC64;
921      options.ReplaceTinyPivot  = YES;
922      options.IterRefine        = DOUBLE;
923      options.Trans             = NOTRANS;
924      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
925      options.RefineInitialized = NO;
926      options.PrintStat         = YES;
927      options.SymPattern        = NO;
928   */
929   set_default_options_dist(&options);
930 
931   B->trivialsymbolic = PETSC_TRUE;
932   if (ftype == MAT_FACTOR_LU) {
933     B->factortype            = MAT_FACTOR_LU;
934     B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
935   } else {
936     B->factortype                  = MAT_FACTOR_CHOLESKY;
937     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
938     options.SymPattern             = YES;
939   }
940 
941   /* set solvertype */
942   PetscCall(PetscFree(B->solvertype));
943   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
944 
945   PetscCall(PetscNew(&lu));
946   B->data = lu;
947   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
948 
949   lu->options              = options;
950   lu->options.Fact         = DOFACT;
951   lu->matsolve_iscalled    = PETSC_FALSE;
952   lu->matmatsolve_iscalled = PETSC_FALSE;
953 
954   PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg));
955   if (flg) {
956     PetscCall(PetscStrcasecmp(string, "single", &flg));
957     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST");
958 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
959     lu->singleprecision = PETSC_TRUE;
960 #endif
961   }
962 
963   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
964   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
965 
966   *F = B;
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969 
970 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
971 {
972   PetscFunctionBegin;
973   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
974   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
975   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
976   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
977   if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
978     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL));
979     PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
980   }
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
984 /*MC
985   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
986 
987   Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch`  to have PETSc installed with SuperLU_DIST
988 
989   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
990 
991    Works with `MATAIJ` matrices
992 
993   Options Database Keys:
994 + -mat_superlu_dist_r <n> - number of rows in processor partition
995 . -mat_superlu_dist_c <n> - number of columns in processor partition
996 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
997 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
998 . -mat_superlu_dist_equil - equilibrate the matrix
999 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
1000 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
1001 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
1002 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
1003 . -mat_superlu_dist_iterrefine - use iterative refinement
1004 . -mat_superlu_dist_printstat - print factorization information
1005 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so
1006                          regardless of the `PC` prefix you must use no prefix here
1007 
1008   Level: beginner
1009 
1010   Note:
1011     If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs.
1012 
1013 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
1014 M*/
1015