xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision b59c00aac33bcee8adab1e4f09d5a24aa820f6bc)
1 /*
2         Provides an interface to the SuperLU_DIST sparse solver
3 */
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 #include <petscpkg_version.h>
8 
9 EXTERN_C_BEGIN
10 #if defined(PETSC_USE_COMPLEX)
11   #define CASTDOUBLECOMPLEX     (doublecomplex *)
12   #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
13   #include <superlu_zdefs.h>
14   #define LUstructInit                  zLUstructInit
15   #define ScalePermstructInit           zScalePermstructInit
16   #define ScalePermstructFree           zScalePermstructFree
17   #define LUstructFree                  zLUstructFree
18   #define Destroy_LU                    zDestroy_LU
19   #define ScalePermstruct_t             zScalePermstruct_t
20   #define LUstruct_t                    zLUstruct_t
21   #define SOLVEstruct_t                 zSOLVEstruct_t
22   #define SolveFinalize                 zSolveFinalize
23   #define pGetDiagU                     pzGetDiagU
24   #define pgssvx                        pzgssvx
25   #define allocateA_dist                zallocateA_dist
26   #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
27   #define SLU                           SLU_Z
28   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
29     #define DeAllocLlu_3d              zDeAllocLlu_3d
30     #define DeAllocGlu_3d              zDeAllocGlu_3d
31     #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
32     #define pgssvx3d                   pzgssvx3d
33   #endif
34 #elif defined(PETSC_USE_REAL_SINGLE)
35   #define CASTDOUBLECOMPLEX
36   #define CASTDOUBLECOMPLEXSTAR
37   #include <superlu_sdefs.h>
38   #define LUstructInit                  sLUstructInit
39   #define ScalePermstructInit           sScalePermstructInit
40   #define ScalePermstructFree           sScalePermstructFree
41   #define LUstructFree                  sLUstructFree
42   #define Destroy_LU                    sDestroy_LU
43   #define ScalePermstruct_t             sScalePermstruct_t
44   #define LUstruct_t                    sLUstruct_t
45   #define SOLVEstruct_t                 sSOLVEstruct_t
46   #define SolveFinalize                 sSolveFinalize
47   #define pGetDiagU                     psGetDiagU
48   #define pgssvx                        psgssvx
49   #define allocateA_dist                sallocateA_dist
50   #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
51   #define SLU                           SLU_S
52   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
53     #define DeAllocLlu_3d              sDeAllocLlu_3d
54     #define DeAllocGlu_3d              sDeAllocGlu_3d
55     #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
56     #define pgssvx3d                   psgssvx3d
57   #endif
58 #else
59   #define CASTDOUBLECOMPLEX
60   #define CASTDOUBLECOMPLEXSTAR
61   #include <superlu_ddefs.h>
62   #define LUstructInit                  dLUstructInit
63   #define ScalePermstructInit           dScalePermstructInit
64   #define ScalePermstructFree           dScalePermstructFree
65   #define LUstructFree                  dLUstructFree
66   #define Destroy_LU                    dDestroy_LU
67   #define ScalePermstruct_t             dScalePermstruct_t
68   #define LUstruct_t                    dLUstruct_t
69   #define SOLVEstruct_t                 dSOLVEstruct_t
70   #define SolveFinalize                 dSolveFinalize
71   #define pGetDiagU                     pdGetDiagU
72   #define pgssvx                        pdgssvx
73   #define allocateA_dist                dallocateA_dist
74   #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
75   #define SLU                           SLU_D
76   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
77     #define DeAllocLlu_3d              dDeAllocLlu_3d
78     #define DeAllocGlu_3d              dDeAllocGlu_3d
79     #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
80     #define pgssvx3d                   pdgssvx3d
81   #endif
82 #endif
83 EXTERN_C_END
84 
85 typedef struct {
86   int_t      nprow, npcol, *row, *col;
87   gridinfo_t grid;
88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
89   PetscBool    use3d;
90   int_t        npdep; /* replication factor, must be power of two */
91   gridinfo3d_t grid3d;
92 #endif
93   superlu_dist_options_t options;
94   SuperMatrix            A_sup;
95   ScalePermstruct_t      ScalePermstruct;
96   LUstruct_t             LUstruct;
97   int                    StatPrint;
98   SOLVEstruct_t          SOLVEstruct;
99   fact_t                 FactPattern;
100   MPI_Comm               comm_superlu;
101   PetscScalar           *val;
102   PetscBool              matsolve_iscalled, matmatsolve_iscalled;
103   PetscBool              CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
104 } Mat_SuperLU_DIST;
105 
106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
107 {
108   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
109 
110   PetscFunctionBegin;
111   PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
112   PetscFunctionReturn(PETSC_SUCCESS);
113 }
114 
115 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
116 {
117   PetscFunctionBegin;
118   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
119   PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 /*  This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
124 typedef struct {
125   MPI_Comm   comm;
126   PetscBool  busy;
127   gridinfo_t grid;
128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
129   PetscBool    use3d;
130   gridinfo3d_t grid3d;
131 #endif
132 } PetscSuperLU_DIST;
133 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
134 
135 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
136 {
137   PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
138 
139   PetscFunctionBegin;
140   if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
141   PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n"));
142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
143   if (context->use3d) {
144     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
145   } else
146 #endif
147     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
148   PetscCallMPI(MPI_Comm_free(&context->comm));
149   PetscCall(PetscFree(context));
150   PetscFunctionReturn(MPI_SUCCESS);
151 }
152 
153 /*
154    Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
155    users who do not destroy all PETSc objects before PetscFinalize().
156 
157    The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
158    can still check that the keyval associated with the MPI communicator is correct when the MPI
159    communicator is destroyed.
160 
161    This is called in PetscFinalize()
162 */
163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
164 {
165   PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
166 
167   PetscFunctionBegin;
168   PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n"));
169   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
174 {
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
176 
177   PetscFunctionBegin;
178   if (lu->CleanUpSuperLU_Dist) {
179     /* Deallocate SuperLU_DIST storage */
180     PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
181     if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
182 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
183     if (lu->use3d) {
184       if (lu->grid3d.zscp.Iam == 0) {
185         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
186       } else {
187         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
188         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
189       }
190       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
191     } else
192 #endif
193       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
194     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
195     PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
196 
197     /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
198     if (lu->comm_superlu) {
199 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
200       if (lu->use3d) {
201         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
202       } else
203 #endif
204         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
205     }
206   }
207   /*
208    * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
209    * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
210    * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
211    * is off, and the communicator was not released or marked as "not busy " in the old code.
212    * Here we try to release comm regardless.
213   */
214   if (lu->comm_superlu) {
215     PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
216   } else {
217     PetscSuperLU_DIST *context;
218     MPI_Comm           comm;
219     PetscMPIInt        flg;
220 
221     PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
222     PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
223     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute");
224     context->busy = PETSC_FALSE;
225   }
226 
227   PetscCall(PetscFree(A->data));
228   /* clear composed functions */
229   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
230   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
231 
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
236 {
237   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
238   PetscInt          m  = A->rmap->n;
239   SuperLUStat_t     stat;
240   PetscReal         berr[1];
241   PetscScalar      *bptr = NULL;
242   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
243   static PetscBool  cite = PETSC_FALSE;
244 
245   PetscFunctionBegin;
246   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
247   PetscCall(PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM "
248                                    "Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",
249                                    &cite));
250 
251   if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
252     /* see comments in MatMatSolve() */
253     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
254     lu->options.SolveInitialized = NO;
255   }
256   PetscCall(VecCopy(b_mpi, x));
257   PetscCall(VecGetArray(x, &bptr));
258 
259   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
261   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
262   else
263 #endif
264     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
265   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
266 
267   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
268   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
269 
270   PetscCall(VecRestoreArray(x, &bptr));
271   lu->matsolve_iscalled    = PETSC_TRUE;
272   lu->matmatsolve_iscalled = PETSC_FALSE;
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
277 {
278   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
279   PetscInt          m  = A->rmap->n, nrhs;
280   SuperLUStat_t     stat;
281   PetscReal         berr[1];
282   PetscScalar      *bptr;
283   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
284   PetscBool         flg;
285 
286   PetscFunctionBegin;
287   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
288   PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
289   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
290   if (X != B_mpi) {
291     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
292     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
293   }
294 
295   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
296     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
297        thus destroy it and create a new SOLVEstruct.
298        Otherwise it may result in memory corruption or incorrect solution
299        See src/mat/tests/ex125.c */
300     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
301     lu->options.SolveInitialized = NO;
302   }
303   if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
304 
305   PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
306 
307   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
308   PetscCall(MatDenseGetArray(X, &bptr));
309 
310 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
311   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
312   else
313 #endif
314     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
315 
316   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
317   PetscCall(MatDenseRestoreArray(X, &bptr));
318 
319   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
320   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
321   lu->matsolve_iscalled    = PETSC_FALSE;
322   lu->matmatsolve_iscalled = PETSC_TRUE;
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 /*
327   input:
328    F:        numeric Cholesky factor
329   output:
330    nneg:     total number of negative pivots
331    nzero:    total number of zero pivots
332    npos:     (global dimension of F) - nneg - nzero
333 */
334 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
335 {
336   Mat_SuperLU_DIST *lu    = (Mat_SuperLU_DIST *)F->data;
337   PetscScalar      *diagU = NULL;
338   PetscInt          M, i, neg = 0, zero = 0, pos = 0;
339   PetscReal         r;
340 
341   PetscFunctionBegin;
342   PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
343   PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
344   PetscCall(MatGetSize(F, &M, NULL));
345   PetscCall(PetscMalloc1(M, &diagU));
346   PetscCall(MatSuperluDistGetDiagU(F, diagU));
347   for (i = 0; i < M; i++) {
348 #if defined(PETSC_USE_COMPLEX)
349     r = PetscImaginaryPart(diagU[i]) / 10.0;
350     PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
351     r = PetscRealPart(diagU[i]);
352 #else
353     r = diagU[i];
354 #endif
355     if (r > 0) {
356       pos++;
357     } else if (r < 0) {
358       neg++;
359     } else zero++;
360   }
361 
362   PetscCall(PetscFree(diagU));
363   if (nneg) *nneg = neg;
364   if (nzero) *nzero = zero;
365   if (npos) *npos = pos;
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
370 {
371   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
372   Mat                Aloc;
373   const PetscScalar *av;
374   const PetscInt    *ai = NULL, *aj = NULL;
375   PetscInt           nz, dummy;
376   int                sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
377   SuperLUStat_t      stat;
378   PetscReal         *berr = 0;
379   PetscBool          ismpiaij, isseqaij, flg;
380 
381   PetscFunctionBegin;
382   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
383   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
384   if (ismpiaij) {
385     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
386   } else if (isseqaij) {
387     PetscCall(PetscObjectReference((PetscObject)A));
388     Aloc = A;
389   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
390 
391   PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
392   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
393   PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
394   nz = ai[Aloc->rmap->n];
395 
396   /* Allocations for A_sup */
397   if (lu->options.Fact == DOFACT) { /* first numeric factorization */
398     PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
399   } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
400     if (lu->FactPattern == SamePattern_SameRowPerm) {
401       lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
402     } else if (lu->FactPattern == SamePattern) {
403 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
404       if (lu->use3d) {
405         if (lu->grid3d.zscp.Iam == 0) {
406           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
407           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
408         } else {
409           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
410           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
411         }
412       } else
413 #endif
414         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
415       lu->options.Fact = SamePattern;
416     } else if (lu->FactPattern == DOFACT) {
417       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
418       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
419       lu->options.Fact = DOFACT;
420       PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
421     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
422   }
423 
424   /* Copy AIJ matrix to superlu_dist matrix */
425   PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
426   PetscCall(PetscArraycpy(lu->col, aj, nz));
427   PetscCall(PetscArraycpy(lu->val, av, nz));
428   PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
429   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
430   PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
431   PetscCall(MatDestroy(&Aloc));
432 
433   /* Create and setup A_sup */
434   if (lu->options.Fact == DOFACT) {
435     PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
436   }
437 
438   /* Factor the matrix. */
439   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
440 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
441   if (lu->use3d) {
442     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
443   } else
444 #endif
445     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
446   if (sinfo > 0) {
447     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
448     if (sinfo <= lu->A_sup.ncol) {
449       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
450       PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
451     } else if (sinfo > lu->A_sup.ncol) {
452       /*
453        number of bytes allocated when memory allocation
454        failure occurred, plus A->ncol.
455        */
456       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
457       PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
458     }
459   } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
460 
461   if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
462   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
463   F->assembled     = PETSC_TRUE;
464   F->preallocated  = PETSC_TRUE;
465   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 /* Note the Petsc r and c permutations are ignored */
470 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
471 {
472   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
473   PetscInt           M = A->rmap->N, N = A->cmap->N, indx;
474   PetscMPIInt        size, mpiflg;
475   PetscBool          flg, set;
476   const char        *colperm[]     = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
477   const char        *rowperm[]     = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
478   const char        *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
479   MPI_Comm           comm;
480   PetscSuperLU_DIST *context = NULL;
481 
482   PetscFunctionBegin;
483   /* Set options to F */
484   PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
485   PetscCallMPI(MPI_Comm_size(comm, &size));
486 
487   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
488   PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
489   if (set && !flg) lu->options.Equil = NO;
490 
491   PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
492   if (flg) {
493     switch (indx) {
494     case 0:
495       lu->options.RowPerm = NOROWPERM;
496       break;
497     case 1:
498       lu->options.RowPerm = LargeDiag_MC64;
499       break;
500     case 2:
501       lu->options.RowPerm = LargeDiag_AWPM;
502       break;
503     case 3:
504       lu->options.RowPerm = MY_PERMR;
505       break;
506     default:
507       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
508     }
509   }
510 
511   PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
512   if (flg) {
513     switch (indx) {
514     case 0:
515       lu->options.ColPerm = NATURAL;
516       break;
517     case 1:
518       lu->options.ColPerm = MMD_AT_PLUS_A;
519       break;
520     case 2:
521       lu->options.ColPerm = MMD_ATA;
522       break;
523     case 3:
524       lu->options.ColPerm = METIS_AT_PLUS_A;
525       break;
526     case 4:
527       lu->options.ColPerm = PARMETIS; /* only works for np>1 */
528       break;
529     default:
530       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
531     }
532   }
533 
534   lu->options.ReplaceTinyPivot = NO;
535   PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
536   if (set && flg) lu->options.ReplaceTinyPivot = YES;
537 
538   lu->options.ParSymbFact = NO;
539   PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
540   if (set && flg && size > 1) {
541 #if defined(PETSC_HAVE_PARMETIS)
542     lu->options.ParSymbFact = YES;
543     lu->options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
544 #else
545     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
546 #endif
547   }
548 
549   lu->FactPattern = SamePattern;
550   PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
551   if (flg) {
552     switch (indx) {
553     case 0:
554       lu->FactPattern = SamePattern;
555       break;
556     case 1:
557       lu->FactPattern = SamePattern_SameRowPerm;
558       break;
559     case 2:
560       lu->FactPattern = DOFACT;
561       break;
562     }
563   }
564 
565   lu->options.IterRefine = NOREFINE;
566   PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
567   if (set) {
568     if (flg) lu->options.IterRefine = SLU_DOUBLE;
569     else lu->options.IterRefine = NOREFINE;
570   }
571 
572   if (PetscLogPrintInfo) lu->options.PrintStat = YES;
573   else lu->options.PrintStat = NO;
574   PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
575   PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
576 
577   /* Additional options for special cases */
578   if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
579     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0));
580     PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
581   }
582   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
583   if (!mpiflg || context->busy) { /* additional options */
584     if (!mpiflg) {
585       PetscCall(PetscNew(&context));
586       context->busy = PETSC_TRUE;
587       PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
588       PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
589     } else {
590       PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
591     }
592 
593     /* Default number of process columns and rows */
594     lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
595     if (!lu->nprow) lu->nprow = 1;
596     while (lu->nprow > 0) {
597       lu->npcol = (int_t)(size / lu->nprow);
598       if (size == lu->nprow * lu->npcol) break;
599       lu->nprow--;
600     }
601 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
602     lu->use3d = PETSC_FALSE;
603     lu->npdep = 1;
604 #endif
605 
606 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
607     PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
608     PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation");
609     if (lu->use3d) {
610       PetscInt t;
611       PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
612       t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
613       PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
614       if (lu->npdep > 1) {
615         lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
616         if (!lu->nprow) lu->nprow = 1;
617         while (lu->nprow > 0) {
618           lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
619           if (size == lu->nprow * lu->npcol * lu->npdep) break;
620           lu->nprow--;
621         }
622       }
623     }
624 #endif
625     PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
626     PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
627 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
628     PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
629 #else
630     PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
631 #endif
632     /* end of adding additional options */
633 
634 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
635     if (lu->use3d) {
636       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
637       if (context) {
638         context->grid3d = lu->grid3d;
639         context->use3d  = lu->use3d;
640       }
641     } else {
642 #endif
643       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
644       if (context) context->grid = lu->grid;
645 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
646     }
647 #endif
648     PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
649     if (mpiflg) {
650       PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
651     } else {
652       PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
653     }
654   } else { /* (mpiflg && !context->busy) */
655     PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute."));
656     context->busy = PETSC_TRUE;
657     lu->grid      = context->grid;
658   }
659   PetscOptionsEnd();
660 
661   /* Initialize ScalePermstruct and LUstruct. */
662   PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
663   PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
664   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
665   F->ops->solve           = MatSolve_SuperLU_DIST;
666   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
667   F->ops->getinertia      = NULL;
668 
669   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
670   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
675 {
676   PetscFunctionBegin;
677   PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
678   F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
679   PetscFunctionReturn(PETSC_SUCCESS);
680 }
681 
682 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
683 {
684   PetscFunctionBegin;
685   *type = MATSOLVERSUPERLU_DIST;
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
690 {
691   Mat_SuperLU_DIST      *lu = (Mat_SuperLU_DIST *)A->data;
692   superlu_dist_options_t options;
693 
694   PetscFunctionBegin;
695   /* check if matrix is superlu_dist type */
696   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
697 
698   options = lu->options;
699   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
700   /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
701    * format spec for int64_t is set to %d for whatever reason */
702   PetscCall(PetscViewerASCIIPrintf(viewer, "  Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
703 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
704   if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
705 #endif
706 
707   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
708   PetscCall(PetscViewerASCIIPrintf(viewer, "  Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
709   PetscCall(PetscViewerASCIIPrintf(viewer, "  Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
710   PetscCall(PetscViewerASCIIPrintf(viewer, "  Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
711 
712   switch (options.RowPerm) {
713   case NOROWPERM:
714     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation NOROWPERM\n"));
715     break;
716   case LargeDiag_MC64:
717     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_MC64\n"));
718     break;
719   case LargeDiag_AWPM:
720     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_AWPM\n"));
721     break;
722   case MY_PERMR:
723     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation MY_PERMR\n"));
724     break;
725   default:
726     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
727   }
728 
729   switch (options.ColPerm) {
730   case NATURAL:
731     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation NATURAL\n"));
732     break;
733   case MMD_AT_PLUS_A:
734     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_AT_PLUS_A\n"));
735     break;
736   case MMD_ATA:
737     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_ATA\n"));
738     break;
739   /*  Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
740   case METIS_AT_PLUS_A:
741     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation METIS_AT_PLUS_A\n"));
742     break;
743   case PARMETIS:
744     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation PARMETIS\n"));
745     break;
746   default:
747     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
748   }
749 
750   PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
751 
752   if (lu->FactPattern == SamePattern) {
753     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern\n"));
754   } else if (lu->FactPattern == SamePattern_SameRowPerm) {
755     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern_SameRowPerm\n"));
756   } else if (lu->FactPattern == DOFACT) {
757     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization DOFACT\n"));
758   } else {
759     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
760   }
761   PetscFunctionReturn(PETSC_SUCCESS);
762 }
763 
764 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
765 {
766   PetscBool         iascii;
767   PetscViewerFormat format;
768 
769   PetscFunctionBegin;
770   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
771   if (iascii) {
772     PetscCall(PetscViewerGetFormat(viewer, &format));
773     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
774   }
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
779 {
780   Mat                    B;
781   Mat_SuperLU_DIST      *lu;
782   PetscInt               M = A->rmap->N, N = A->cmap->N;
783   PetscMPIInt            size;
784   superlu_dist_options_t options;
785 
786   PetscFunctionBegin;
787   /* Create the factorization matrix */
788   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
789   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
790   PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
791   PetscCall(MatSetUp(B));
792   B->ops->getinfo = MatGetInfo_External;
793   B->ops->view    = MatView_SuperLU_DIST;
794   B->ops->destroy = MatDestroy_SuperLU_DIST;
795 
796   /* Set the default input options:
797      options.Fact              = DOFACT;
798      options.Equil             = YES;
799      options.ParSymbFact       = NO;
800      options.ColPerm           = METIS_AT_PLUS_A;
801      options.RowPerm           = LargeDiag_MC64;
802      options.ReplaceTinyPivot  = YES;
803      options.IterRefine        = DOUBLE;
804      options.Trans             = NOTRANS;
805      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
806      options.RefineInitialized = NO;
807      options.PrintStat         = YES;
808      options.SymPattern        = NO;
809   */
810   set_default_options_dist(&options);
811 
812   B->trivialsymbolic = PETSC_TRUE;
813   if (ftype == MAT_FACTOR_LU) {
814     B->factortype            = MAT_FACTOR_LU;
815     B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
816   } else {
817     B->factortype                  = MAT_FACTOR_CHOLESKY;
818     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
819     options.SymPattern             = YES;
820   }
821 
822   /* set solvertype */
823   PetscCall(PetscFree(B->solvertype));
824   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
825 
826   PetscCall(PetscNew(&lu));
827   B->data = lu;
828   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
829 
830   lu->options              = options;
831   lu->options.Fact         = DOFACT;
832   lu->matsolve_iscalled    = PETSC_FALSE;
833   lu->matmatsolve_iscalled = PETSC_FALSE;
834 
835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
837 
838   *F = B;
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
843 {
844   PetscFunctionBegin;
845   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
846   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
847   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
848   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 /*MC
853   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
854 
855   Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch`  to have PETSc installed with SuperLU_DIST
856 
857   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
858 
859    Works with `MATAIJ` matrices
860 
861   Options Database Keys:
862 + -mat_superlu_dist_r <n> - number of rows in processor partition
863 . -mat_superlu_dist_c <n> - number of columns in processor partition
864 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
865 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
866 . -mat_superlu_dist_equil - equilibrate the matrix
867 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
868 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
869 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
870 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
871 . -mat_superlu_dist_iterrefine - use iterative refinement
872 - -mat_superlu_dist_printstat - print factorization information
873 
874   Level: beginner
875 
876   Note:
877     If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs.
878 
879 .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
880 M*/
881