xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 30bab8dbce35fbb76248bd0fea1b2bc98f8a4f04)
1 /*
2         Provides an interface to the SuperLU_DIST sparse solver
3 */
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 #include <petscpkg_version.h>
8 
9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #define CASTDOUBLECOMPLEX     (doublecomplex *)
13   #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
14   #include <superlu_zdefs.h>
15   #define LUstructInit                  zLUstructInit
16   #define ScalePermstructInit           zScalePermstructInit
17   #define ScalePermstructFree           zScalePermstructFree
18   #define LUstructFree                  zLUstructFree
19   #define Destroy_LU                    zDestroy_LU
20   #define ScalePermstruct_t             zScalePermstruct_t
21   #define LUstruct_t                    zLUstruct_t
22   #define SOLVEstruct_t                 zSOLVEstruct_t
23   #define SolveFinalize                 zSolveFinalize
24   #define pGetDiagU                     pzGetDiagU
25   #define pgssvx                        pzgssvx
26   #define allocateA_dist                zallocateA_dist
27   #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
28   #define SLU                           SLU_Z
29   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
30     #define DeAllocLlu_3d              zDeAllocLlu_3d
31     #define DeAllocGlu_3d              zDeAllocGlu_3d
32     #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
33     #define pgssvx3d                   pzgssvx3d
34   #endif
35 #elif defined(PETSC_USE_REAL_SINGLE)
36   #define CASTDOUBLECOMPLEX
37   #define CASTDOUBLECOMPLEXSTAR
38   #include <superlu_sdefs.h>
39   #define LUstructInit                  sLUstructInit
40   #define ScalePermstructInit           sScalePermstructInit
41   #define ScalePermstructFree           sScalePermstructFree
42   #define LUstructFree                  sLUstructFree
43   #define Destroy_LU                    sDestroy_LU
44   #define ScalePermstruct_t             sScalePermstruct_t
45   #define LUstruct_t                    sLUstruct_t
46   #define SOLVEstruct_t                 sSOLVEstruct_t
47   #define SolveFinalize                 sSolveFinalize
48   #define pGetDiagU                     psGetDiagU
49   #define pgssvx                        psgssvx
50   #define allocateA_dist                sallocateA_dist
51   #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
52   #define SLU                           SLU_S
53   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
54     #define DeAllocLlu_3d              sDeAllocLlu_3d
55     #define DeAllocGlu_3d              sDeAllocGlu_3d
56     #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
57     #define pgssvx3d                   psgssvx3d
58   #endif
59 #else
60   #define CASTDOUBLECOMPLEX
61   #define CASTDOUBLECOMPLEXSTAR
62   #include <superlu_ddefs.h>
63   #define LUstructInit                  dLUstructInit
64   #define ScalePermstructInit           dScalePermstructInit
65   #define ScalePermstructFree           dScalePermstructFree
66   #define LUstructFree                  dLUstructFree
67   #define Destroy_LU                    dDestroy_LU
68   #define ScalePermstruct_t             dScalePermstruct_t
69   #define LUstruct_t                    dLUstruct_t
70   #define SOLVEstruct_t                 dSOLVEstruct_t
71   #define SolveFinalize                 dSolveFinalize
72   #define pGetDiagU                     pdGetDiagU
73   #define pgssvx                        pdgssvx
74   #define allocateA_dist                dallocateA_dist
75   #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
76   #define SLU                           SLU_D
77   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
78     #define DeAllocLlu_3d              dDeAllocLlu_3d
79     #define DeAllocGlu_3d              dDeAllocGlu_3d
80     #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
81     #define pgssvx3d                   pdgssvx3d
82   #endif
83 #endif
84 EXTERN_C_END
85 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
86 
87 typedef struct {
88   int_t      nprow, npcol, *row, *col;
89   gridinfo_t grid;
90 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
91   PetscBool    use3d;
92   int_t        npdep; /* replication factor, must be power of two */
93   gridinfo3d_t grid3d;
94 #endif
95   superlu_dist_options_t options;
96   SuperMatrix            A_sup;
97   ScalePermstruct_t      ScalePermstruct;
98   LUstruct_t             LUstruct;
99   int                    StatPrint;
100   SOLVEstruct_t          SOLVEstruct;
101   fact_t                 FactPattern;
102   MPI_Comm               comm_superlu;
103   PetscScalar           *val;
104   PetscBool              matsolve_iscalled, matmatsolve_iscalled;
105   PetscBool              CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
106 } Mat_SuperLU_DIST;
107 
108 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
109 {
110   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
111 
112   PetscFunctionBegin;
113   PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
118 {
119   PetscFunctionBegin;
120   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
121   PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
122   PetscFunctionReturn(PETSC_SUCCESS);
123 }
124 
125 /*  This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
126 typedef struct {
127   MPI_Comm   comm;
128   PetscBool  busy;
129   gridinfo_t grid;
130 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
131   PetscBool    use3d;
132   gridinfo3d_t grid3d;
133 #endif
134 } PetscSuperLU_DIST;
135 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
136 
137 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
138 {
139   PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
140 
141   PetscFunctionBegin;
142   if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
143   PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n"));
144 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
145   if (context->use3d) {
146     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
147   } else
148 #endif
149     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
150   PetscCallMPI(MPI_Comm_free(&context->comm));
151   PetscCall(PetscFree(context));
152   PetscFunctionReturn(MPI_SUCCESS);
153 }
154 
155 /*
156    Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
157    users who do not destroy all PETSc objects before PetscFinalize().
158 
159    The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
160    can still check that the keyval associated with the MPI communicator is correct when the MPI
161    communicator is destroyed.
162 
163    This is called in PetscFinalize()
164 */
165 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
166 {
167   PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
168 
169   PetscFunctionBegin;
170   PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n"));
171   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
176 {
177   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
178 
179   PetscFunctionBegin;
180   if (lu->CleanUpSuperLU_Dist) {
181     /* Deallocate SuperLU_DIST storage */
182     PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
183     if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
185     if (lu->use3d) {
186       if (lu->grid3d.zscp.Iam == 0) {
187         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
188       } else {
189         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
190         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
191       }
192       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
193     } else
194 #endif
195       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
196     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
197     PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
198 
199     /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
200     if (lu->comm_superlu) {
201 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
202       if (lu->use3d) {
203         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
204       } else
205 #endif
206         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
207     }
208   }
209   /*
210    * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
211    * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
212    * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
213    * is off, and the communicator was not released or marked as "not busy " in the old code.
214    * Here we try to release comm regardless.
215   */
216   if (lu->comm_superlu) {
217     PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
218   } else {
219     PetscSuperLU_DIST *context;
220     MPI_Comm           comm;
221     PetscMPIInt        flg;
222 
223     PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
224     PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
225     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute");
226     context->busy = PETSC_FALSE;
227   }
228 
229   PetscCall(PetscFree(A->data));
230   /* clear composed functions */
231   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
232   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
233 
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
238 {
239   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
240   PetscInt          m  = A->rmap->n;
241   SuperLUStat_t     stat;
242   PetscReal         berr[1];
243   PetscScalar      *bptr = NULL;
244   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
245   static PetscBool  cite = PETSC_FALSE;
246 
247   PetscFunctionBegin;
248   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
249   PetscCall(PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM "
250                                    "Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",
251                                    &cite));
252 
253   if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
254     /* see comments in MatMatSolve() */
255     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
256     lu->options.SolveInitialized = NO;
257   }
258   PetscCall(VecCopy(b_mpi, x));
259   PetscCall(VecGetArray(x, &bptr));
260 
261   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
262 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
263   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
264   else
265 #endif
266     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
267   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
268 
269   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
270   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
271 
272   PetscCall(VecRestoreArray(x, &bptr));
273   lu->matsolve_iscalled    = PETSC_TRUE;
274   lu->matmatsolve_iscalled = PETSC_FALSE;
275   PetscFunctionReturn(PETSC_SUCCESS);
276 }
277 
278 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
279 {
280   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
281   PetscInt          m  = A->rmap->n, nrhs;
282   SuperLUStat_t     stat;
283   PetscReal         berr[1];
284   PetscScalar      *bptr;
285   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
286   PetscBool         flg;
287 
288   PetscFunctionBegin;
289   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
290   PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
291   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
292   if (X != B_mpi) {
293     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
294     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
295   }
296 
297   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
298     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
299        thus destroy it and create a new SOLVEstruct.
300        Otherwise it may result in memory corruption or incorrect solution
301        See src/mat/tests/ex125.c */
302     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
303     lu->options.SolveInitialized = NO;
304   }
305   if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
306 
307   PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
308 
309   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
310   PetscCall(MatDenseGetArray(X, &bptr));
311 
312 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
313   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
314   else
315 #endif
316     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
317 
318   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
319   PetscCall(MatDenseRestoreArray(X, &bptr));
320 
321   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
322   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
323   lu->matsolve_iscalled    = PETSC_FALSE;
324   lu->matmatsolve_iscalled = PETSC_TRUE;
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 /*
329   input:
330    F:        numeric Cholesky factor
331   output:
332    nneg:     total number of negative pivots
333    nzero:    total number of zero pivots
334    npos:     (global dimension of F) - nneg - nzero
335 */
336 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
337 {
338   Mat_SuperLU_DIST *lu    = (Mat_SuperLU_DIST *)F->data;
339   PetscScalar      *diagU = NULL;
340   PetscInt          M, i, neg = 0, zero = 0, pos = 0;
341   PetscReal         r;
342 
343   PetscFunctionBegin;
344   PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
345   PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
346   PetscCall(MatGetSize(F, &M, NULL));
347   PetscCall(PetscMalloc1(M, &diagU));
348   PetscCall(MatSuperluDistGetDiagU(F, diagU));
349   for (i = 0; i < M; i++) {
350 #if defined(PETSC_USE_COMPLEX)
351     r = PetscImaginaryPart(diagU[i]) / 10.0;
352     PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
353     r = PetscRealPart(diagU[i]);
354 #else
355     r = diagU[i];
356 #endif
357     if (r > 0) {
358       pos++;
359     } else if (r < 0) {
360       neg++;
361     } else zero++;
362   }
363 
364   PetscCall(PetscFree(diagU));
365   if (nneg) *nneg = neg;
366   if (nzero) *nzero = zero;
367   if (npos) *npos = pos;
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
372 {
373   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
374   Mat                Aloc;
375   const PetscScalar *av;
376   const PetscInt    *ai = NULL, *aj = NULL;
377   PetscInt           nz, dummy;
378   int                sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
379   SuperLUStat_t      stat;
380   PetscReal         *berr = 0;
381   PetscBool          ismpiaij, isseqaij, flg;
382 
383   PetscFunctionBegin;
384   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
385   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
386   if (ismpiaij) {
387     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
388   } else if (isseqaij) {
389     PetscCall(PetscObjectReference((PetscObject)A));
390     Aloc = A;
391   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
392 
393   PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
394   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
395   PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
396   nz = ai[Aloc->rmap->n];
397 
398   /* Allocations for A_sup */
399   if (lu->options.Fact == DOFACT) { /* first numeric factorization */
400     PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
401   } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
402     if (lu->FactPattern == SamePattern_SameRowPerm) {
403       lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
404     } else if (lu->FactPattern == SamePattern) {
405 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
406       if (lu->use3d) {
407         if (lu->grid3d.zscp.Iam == 0) {
408           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
409           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
410         } else {
411           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
412           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
413         }
414       } else
415 #endif
416         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
417       lu->options.Fact = SamePattern;
418     } else if (lu->FactPattern == DOFACT) {
419       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
420       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
421       lu->options.Fact = DOFACT;
422       PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
423     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
424   }
425 
426   /* Copy AIJ matrix to superlu_dist matrix */
427   PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
428   PetscCall(PetscArraycpy(lu->col, aj, nz));
429   PetscCall(PetscArraycpy(lu->val, av, nz));
430   PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
431   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
432   PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
433   PetscCall(MatDestroy(&Aloc));
434 
435   /* Create and setup A_sup */
436   if (lu->options.Fact == DOFACT) {
437     PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
438   }
439 
440   /* Factor the matrix. */
441   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
442 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
443   if (lu->use3d) {
444     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
445   } else
446 #endif
447     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
448   if (sinfo > 0) {
449     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
450     if (sinfo <= lu->A_sup.ncol) {
451       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
452       PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
453     } else if (sinfo > lu->A_sup.ncol) {
454       /*
455        number of bytes allocated when memory allocation
456        failure occurred, plus A->ncol.
457        */
458       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
459       PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
460     }
461   } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
462 
463   if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
464   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
465   F->assembled     = PETSC_TRUE;
466   F->preallocated  = PETSC_TRUE;
467   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /* Note the Petsc r and c permutations are ignored */
472 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
473 {
474   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
475   PetscInt           M = A->rmap->N, N = A->cmap->N, indx;
476   PetscMPIInt        size, mpiflg;
477   PetscBool          flg, set;
478   const char        *colperm[]     = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
479   const char        *rowperm[]     = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
480   const char        *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
481   MPI_Comm           comm;
482   PetscSuperLU_DIST *context = NULL;
483 
484   PetscFunctionBegin;
485   /* Set options to F */
486   PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
487   PetscCallMPI(MPI_Comm_size(comm, &size));
488 
489   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
490   PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
491   if (set && !flg) lu->options.Equil = NO;
492 
493   PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
494   if (flg) {
495     switch (indx) {
496     case 0:
497       lu->options.RowPerm = NOROWPERM;
498       break;
499     case 1:
500       lu->options.RowPerm = LargeDiag_MC64;
501       break;
502     case 2:
503       lu->options.RowPerm = LargeDiag_AWPM;
504       break;
505     case 3:
506       lu->options.RowPerm = MY_PERMR;
507       break;
508     default:
509       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
510     }
511   }
512 
513   PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
514   if (flg) {
515     switch (indx) {
516     case 0:
517       lu->options.ColPerm = NATURAL;
518       break;
519     case 1:
520       lu->options.ColPerm = MMD_AT_PLUS_A;
521       break;
522     case 2:
523       lu->options.ColPerm = MMD_ATA;
524       break;
525     case 3:
526       lu->options.ColPerm = METIS_AT_PLUS_A;
527       break;
528     case 4:
529       lu->options.ColPerm = PARMETIS; /* only works for np>1 */
530       break;
531     default:
532       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
533     }
534   }
535 
536   lu->options.ReplaceTinyPivot = NO;
537   PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
538   if (set && flg) lu->options.ReplaceTinyPivot = YES;
539 
540   lu->options.ParSymbFact = NO;
541   PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
542   if (set && flg && size > 1) {
543 #if defined(PETSC_HAVE_PARMETIS)
544     lu->options.ParSymbFact = YES;
545     lu->options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
546 #else
547     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
548 #endif
549   }
550 
551   lu->FactPattern = SamePattern;
552   PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
553   if (flg) {
554     switch (indx) {
555     case 0:
556       lu->FactPattern = SamePattern;
557       break;
558     case 1:
559       lu->FactPattern = SamePattern_SameRowPerm;
560       break;
561     case 2:
562       lu->FactPattern = DOFACT;
563       break;
564     }
565   }
566 
567   lu->options.IterRefine = NOREFINE;
568   PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
569   if (set) {
570     if (flg) lu->options.IterRefine = SLU_DOUBLE;
571     else lu->options.IterRefine = NOREFINE;
572   }
573 
574   if (PetscLogPrintInfo) lu->options.PrintStat = YES;
575   else lu->options.PrintStat = NO;
576   PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
577   PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
578 
579   /* Additional options for special cases */
580   if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
581     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0));
582     PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
583   }
584   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
585   if (!mpiflg || context->busy) { /* additional options */
586     if (!mpiflg) {
587       PetscCall(PetscNew(&context));
588       context->busy = PETSC_TRUE;
589       PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
590       PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
591     } else {
592       PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
593     }
594 
595     /* Default number of process columns and rows */
596     lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
597     if (!lu->nprow) lu->nprow = 1;
598     while (lu->nprow > 0) {
599       lu->npcol = (int_t)(size / lu->nprow);
600       if (size == lu->nprow * lu->npcol) break;
601       lu->nprow--;
602     }
603 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
604     lu->use3d = PETSC_FALSE;
605     lu->npdep = 1;
606 #endif
607 
608 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
609     PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
610     PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation");
611     if (lu->use3d) {
612       PetscInt t;
613       PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
614       t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
615       PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
616       if (lu->npdep > 1) {
617         lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
618         if (!lu->nprow) lu->nprow = 1;
619         while (lu->nprow > 0) {
620           lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
621           if (size == lu->nprow * lu->npcol * lu->npdep) break;
622           lu->nprow--;
623         }
624       }
625     }
626 #endif
627     PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
628     PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
629 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
630     PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
631 #else
632     PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
633 #endif
634     /* end of adding additional options */
635 
636 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
637     if (lu->use3d) {
638       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
639       if (context) {
640         context->grid3d = lu->grid3d;
641         context->use3d  = lu->use3d;
642       }
643     } else {
644 #endif
645       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
646       if (context) context->grid = lu->grid;
647 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
648     }
649 #endif
650     PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
651     if (mpiflg) {
652       PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
653     } else {
654       PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
655     }
656   } else { /* (mpiflg && !context->busy) */
657     PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute."));
658     context->busy = PETSC_TRUE;
659     lu->grid      = context->grid;
660   }
661   PetscOptionsEnd();
662 
663   /* Initialize ScalePermstruct and LUstruct. */
664   PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
665   PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
666   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
667   F->ops->solve           = MatSolve_SuperLU_DIST;
668   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
669   F->ops->getinertia      = NULL;
670 
671   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
672   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
673   PetscFunctionReturn(PETSC_SUCCESS);
674 }
675 
676 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
677 {
678   PetscFunctionBegin;
679   PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
680   F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
681   PetscFunctionReturn(PETSC_SUCCESS);
682 }
683 
684 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
685 {
686   PetscFunctionBegin;
687   *type = MATSOLVERSUPERLU_DIST;
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
692 {
693   Mat_SuperLU_DIST      *lu = (Mat_SuperLU_DIST *)A->data;
694   superlu_dist_options_t options;
695 
696   PetscFunctionBegin;
697   /* check if matrix is superlu_dist type */
698   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
699 
700   options = lu->options;
701   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
702   /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
703    * format spec for int64_t is set to %d for whatever reason */
704   PetscCall(PetscViewerASCIIPrintf(viewer, "  Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
705 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
706   if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
707 #endif
708 
709   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
710   PetscCall(PetscViewerASCIIPrintf(viewer, "  Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
711   PetscCall(PetscViewerASCIIPrintf(viewer, "  Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
712   PetscCall(PetscViewerASCIIPrintf(viewer, "  Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
713 
714   switch (options.RowPerm) {
715   case NOROWPERM:
716     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation NOROWPERM\n"));
717     break;
718   case LargeDiag_MC64:
719     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_MC64\n"));
720     break;
721   case LargeDiag_AWPM:
722     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_AWPM\n"));
723     break;
724   case MY_PERMR:
725     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation MY_PERMR\n"));
726     break;
727   default:
728     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
729   }
730 
731   switch (options.ColPerm) {
732   case NATURAL:
733     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation NATURAL\n"));
734     break;
735   case MMD_AT_PLUS_A:
736     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_AT_PLUS_A\n"));
737     break;
738   case MMD_ATA:
739     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_ATA\n"));
740     break;
741   /*  Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
742   case METIS_AT_PLUS_A:
743     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation METIS_AT_PLUS_A\n"));
744     break;
745   case PARMETIS:
746     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation PARMETIS\n"));
747     break;
748   default:
749     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
750   }
751 
752   PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
753 
754   if (lu->FactPattern == SamePattern) {
755     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern\n"));
756   } else if (lu->FactPattern == SamePattern_SameRowPerm) {
757     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern_SameRowPerm\n"));
758   } else if (lu->FactPattern == DOFACT) {
759     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization DOFACT\n"));
760   } else {
761     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
762   }
763   PetscFunctionReturn(PETSC_SUCCESS);
764 }
765 
766 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
767 {
768   PetscBool         iascii;
769   PetscViewerFormat format;
770 
771   PetscFunctionBegin;
772   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
773   if (iascii) {
774     PetscCall(PetscViewerGetFormat(viewer, &format));
775     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
776   }
777   PetscFunctionReturn(PETSC_SUCCESS);
778 }
779 
780 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
781 {
782   Mat                    B;
783   Mat_SuperLU_DIST      *lu;
784   PetscInt               M = A->rmap->N, N = A->cmap->N;
785   PetscMPIInt            size;
786   superlu_dist_options_t options;
787 
788   PetscFunctionBegin;
789   /* Create the factorization matrix */
790   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
791   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
792   PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
793   PetscCall(MatSetUp(B));
794   B->ops->getinfo = MatGetInfo_External;
795   B->ops->view    = MatView_SuperLU_DIST;
796   B->ops->destroy = MatDestroy_SuperLU_DIST;
797 
798   /* Set the default input options:
799      options.Fact              = DOFACT;
800      options.Equil             = YES;
801      options.ParSymbFact       = NO;
802      options.ColPerm           = METIS_AT_PLUS_A;
803      options.RowPerm           = LargeDiag_MC64;
804      options.ReplaceTinyPivot  = YES;
805      options.IterRefine        = DOUBLE;
806      options.Trans             = NOTRANS;
807      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
808      options.RefineInitialized = NO;
809      options.PrintStat         = YES;
810      options.SymPattern        = NO;
811   */
812   set_default_options_dist(&options);
813 
814   B->trivialsymbolic = PETSC_TRUE;
815   if (ftype == MAT_FACTOR_LU) {
816     B->factortype            = MAT_FACTOR_LU;
817     B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
818   } else {
819     B->factortype                  = MAT_FACTOR_CHOLESKY;
820     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
821     options.SymPattern             = YES;
822   }
823 
824   /* set solvertype */
825   PetscCall(PetscFree(B->solvertype));
826   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
827 
828   PetscCall(PetscNew(&lu));
829   B->data = lu;
830   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
831 
832   lu->options              = options;
833   lu->options.Fact         = DOFACT;
834   lu->matsolve_iscalled    = PETSC_FALSE;
835   lu->matmatsolve_iscalled = PETSC_FALSE;
836 
837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
839 
840   *F = B;
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
845 {
846   PetscFunctionBegin;
847   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
848   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
849   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
850   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
851   PetscFunctionReturn(PETSC_SUCCESS);
852 }
853 
854 /*MC
855   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
856 
857   Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch`  to have PETSc installed with SuperLU_DIST
858 
859   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
860 
861    Works with `MATAIJ` matrices
862 
863   Options Database Keys:
864 + -mat_superlu_dist_r <n> - number of rows in processor partition
865 . -mat_superlu_dist_c <n> - number of columns in processor partition
866 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
867 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
868 . -mat_superlu_dist_equil - equilibrate the matrix
869 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
870 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
871 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
872 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
873 . -mat_superlu_dist_iterrefine - use iterative refinement
874 - -mat_superlu_dist_printstat - print factorization information
875 
876   Level: beginner
877 
878   Note:
879     If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs.
880 
881 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
882 M*/
883