xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 /*
2         Provides an interface to the SuperLU_DIST sparse solver
3 */
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 #include <petscpkg_version.h>
8 
9 EXTERN_C_BEGIN
10 #if defined(PETSC_USE_COMPLEX)
11 #define CASTDOUBLECOMPLEX     (doublecomplex *)
12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
13 #include <superlu_zdefs.h>
14 #define LUstructInit                  zLUstructInit
15 #define ScalePermstructInit           zScalePermstructInit
16 #define ScalePermstructFree           zScalePermstructFree
17 #define LUstructFree                  zLUstructFree
18 #define Destroy_LU                    zDestroy_LU
19 #define ScalePermstruct_t             zScalePermstruct_t
20 #define LUstruct_t                    zLUstruct_t
21 #define SOLVEstruct_t                 zSOLVEstruct_t
22 #define SolveFinalize                 zSolveFinalize
23 #define pGetDiagU                     pzGetDiagU
24 #define pgssvx                        pzgssvx
25 #define allocateA_dist                zallocateA_dist
26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
27 #define SLU                           SLU_Z
28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
29 #define DeAllocLlu_3d              zDeAllocLlu_3d
30 #define DeAllocGlu_3d              zDeAllocGlu_3d
31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
32 #define pgssvx3d                   pzgssvx3d
33 #endif
34 #elif defined(PETSC_USE_REAL_SINGLE)
35 #define CASTDOUBLECOMPLEX
36 #define CASTDOUBLECOMPLEXSTAR
37 #include <superlu_sdefs.h>
38 #define LUstructInit                  sLUstructInit
39 #define ScalePermstructInit           sScalePermstructInit
40 #define ScalePermstructFree           sScalePermstructFree
41 #define LUstructFree                  sLUstructFree
42 #define Destroy_LU                    sDestroy_LU
43 #define ScalePermstruct_t             sScalePermstruct_t
44 #define LUstruct_t                    sLUstruct_t
45 #define SOLVEstruct_t                 sSOLVEstruct_t
46 #define SolveFinalize                 sSolveFinalize
47 #define pGetDiagU                     psGetDiagU
48 #define pgssvx                        psgssvx
49 #define allocateA_dist                sallocateA_dist
50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
51 #define SLU                           SLU_S
52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
53 #define DeAllocLlu_3d              sDeAllocLlu_3d
54 #define DeAllocGlu_3d              sDeAllocGlu_3d
55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
56 #define pgssvx3d                   psgssvx3d
57 #endif
58 #else
59 #define CASTDOUBLECOMPLEX
60 #define CASTDOUBLECOMPLEXSTAR
61 #include <superlu_ddefs.h>
62 #define LUstructInit                  dLUstructInit
63 #define ScalePermstructInit           dScalePermstructInit
64 #define ScalePermstructFree           dScalePermstructFree
65 #define LUstructFree                  dLUstructFree
66 #define Destroy_LU                    dDestroy_LU
67 #define ScalePermstruct_t             dScalePermstruct_t
68 #define LUstruct_t                    dLUstruct_t
69 #define SOLVEstruct_t                 dSOLVEstruct_t
70 #define SolveFinalize                 dSolveFinalize
71 #define pGetDiagU                     pdGetDiagU
72 #define pgssvx                        pdgssvx
73 #define allocateA_dist                dallocateA_dist
74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
75 #define SLU                           SLU_D
76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
77 #define DeAllocLlu_3d              dDeAllocLlu_3d
78 #define DeAllocGlu_3d              dDeAllocGlu_3d
79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
80 #define pgssvx3d                   pdgssvx3d
81 #endif
82 #endif
83 EXTERN_C_END
84 
85 typedef struct {
86   int_t      nprow, npcol, *row, *col;
87   gridinfo_t grid;
88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
89   PetscBool    use3d;
90   int_t        npdep; /* replication factor, must be power of two */
91   gridinfo3d_t grid3d;
92 #endif
93   superlu_dist_options_t options;
94   SuperMatrix            A_sup;
95   ScalePermstruct_t      ScalePermstruct;
96   LUstruct_t             LUstruct;
97   int                    StatPrint;
98   SOLVEstruct_t          SOLVEstruct;
99   fact_t                 FactPattern;
100   MPI_Comm               comm_superlu;
101   PetscScalar           *val;
102   PetscBool              matsolve_iscalled, matmatsolve_iscalled;
103   PetscBool              CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
104 } Mat_SuperLU_DIST;
105 
106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) {
107   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
108 
109   PetscFunctionBegin;
110   PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) {
115   PetscFunctionBegin;
116   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
117   PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
118   PetscFunctionReturn(0);
119 }
120 
121 /*  This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
122 typedef struct {
123   MPI_Comm   comm;
124   PetscBool  busy;
125   gridinfo_t grid;
126 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
127   PetscBool    use3d;
128   gridinfo3d_t grid3d;
129 #endif
130 } PetscSuperLU_DIST;
131 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
132 
133 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) {
134   PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
135 
136   PetscFunctionBegin;
137   if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
138   PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n"));
139 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
140   if (context->use3d) {
141     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
142   } else
143 #endif
144     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
145   PetscCallMPI(MPI_Comm_free(&context->comm));
146   PetscCall(PetscFree(context));
147   PetscFunctionReturn(MPI_SUCCESS);
148 }
149 
150 /*
151    Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
152    users who do not destroy all PETSc objects before PetscFinalize().
153 
154    The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
155    can still check that the keyval associated with the MPI communicator is correct when the MPI
156    communicator is destroyed.
157 
158    This is called in PetscFinalize()
159 */
160 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) {
161   PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
162 
163   PetscFunctionBegin;
164   PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n"));
165   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
166   PetscFunctionReturn(0);
167 }
168 
169 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) {
170   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
171 
172   PetscFunctionBegin;
173   if (lu->CleanUpSuperLU_Dist) {
174     /* Deallocate SuperLU_DIST storage */
175     PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
176     if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
177 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
178     if (lu->use3d) {
179       if (lu->grid3d.zscp.Iam == 0) {
180         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
181       } else {
182         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
183         PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
184       }
185       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
186     } else
187 #endif
188       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
189     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
190     PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
191 
192     /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
193     if (lu->comm_superlu) {
194 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
195       if (lu->use3d) {
196         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
197       } else
198 #endif
199         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
200     }
201   }
202   /*
203    * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
204    * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
205    * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
206    * is off, and the communicator was not released or marked as "not busy " in the old code.
207    * Here we try to release comm regardless.
208   */
209   if (lu->comm_superlu) {
210     PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
211   } else {
212     PetscSuperLU_DIST *context;
213     MPI_Comm           comm;
214     PetscMPIInt        flg;
215 
216     PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
217     PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
218     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute");
219     context->busy = PETSC_FALSE;
220   }
221 
222   PetscCall(PetscFree(A->data));
223   /* clear composed functions */
224   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
225   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
226 
227   PetscFunctionReturn(0);
228 }
229 
230 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) {
231   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
232   PetscInt          m  = A->rmap->n;
233   SuperLUStat_t     stat;
234   PetscReal         berr[1];
235   PetscScalar      *bptr = NULL;
236   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
237   static PetscBool  cite = PETSC_FALSE;
238 
239   PetscFunctionBegin;
240   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
241   PetscCall(PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM "
242                                    "Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",
243                                    &cite));
244 
245   if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
246     /* see comments in MatMatSolve() */
247     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
248     lu->options.SolveInitialized = NO;
249   }
250   PetscCall(VecCopy(b_mpi, x));
251   PetscCall(VecGetArray(x, &bptr));
252 
253   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
254 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
255   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
256   else
257 #endif
258     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
259   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
260 
261   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
262   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
263 
264   PetscCall(VecRestoreArray(x, &bptr));
265   lu->matsolve_iscalled    = PETSC_TRUE;
266   lu->matmatsolve_iscalled = PETSC_FALSE;
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) {
271   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
272   PetscInt          m  = A->rmap->n, nrhs;
273   SuperLUStat_t     stat;
274   PetscReal         berr[1];
275   PetscScalar      *bptr;
276   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
277   PetscBool         flg;
278 
279   PetscFunctionBegin;
280   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
281   PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
282   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
283   if (X != B_mpi) {
284     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
285     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
286   }
287 
288   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
289     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
290        thus destroy it and create a new SOLVEstruct.
291        Otherwise it may result in memory corruption or incorrect solution
292        See src/mat/tests/ex125.c */
293     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
294     lu->options.SolveInitialized = NO;
295   }
296   if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
297 
298   PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
299 
300   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
301   PetscCall(MatDenseGetArray(X, &bptr));
302 
303 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
304   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
305   else
306 #endif
307     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
308 
309   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
310   PetscCall(MatDenseRestoreArray(X, &bptr));
311 
312   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
313   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
314   lu->matsolve_iscalled    = PETSC_FALSE;
315   lu->matmatsolve_iscalled = PETSC_TRUE;
316   PetscFunctionReturn(0);
317 }
318 
319 /*
320   input:
321    F:        numeric Cholesky factor
322   output:
323    nneg:     total number of negative pivots
324    nzero:    total number of zero pivots
325    npos:     (global dimension of F) - nneg - nzero
326 */
327 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) {
328   Mat_SuperLU_DIST *lu    = (Mat_SuperLU_DIST *)F->data;
329   PetscScalar      *diagU = NULL;
330   PetscInt          M, i, neg = 0, zero = 0, pos = 0;
331   PetscReal         r;
332 
333   PetscFunctionBegin;
334   PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
335   PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
336   PetscCall(MatGetSize(F, &M, NULL));
337   PetscCall(PetscMalloc1(M, &diagU));
338   PetscCall(MatSuperluDistGetDiagU(F, diagU));
339   for (i = 0; i < M; i++) {
340 #if defined(PETSC_USE_COMPLEX)
341     r = PetscImaginaryPart(diagU[i]) / 10.0;
342     PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
343     r = PetscRealPart(diagU[i]);
344 #else
345     r = diagU[i];
346 #endif
347     if (r > 0) {
348       pos++;
349     } else if (r < 0) {
350       neg++;
351     } else zero++;
352   }
353 
354   PetscCall(PetscFree(diagU));
355   if (nneg) *nneg = neg;
356   if (nzero) *nzero = zero;
357   if (npos) *npos = pos;
358   PetscFunctionReturn(0);
359 }
360 
361 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) {
362   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
363   Mat                Aloc;
364   const PetscScalar *av;
365   const PetscInt    *ai = NULL, *aj = NULL;
366   PetscInt           nz, dummy;
367   int                sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
368   SuperLUStat_t      stat;
369   PetscReal         *berr = 0;
370   PetscBool          ismpiaij, isseqaij, flg;
371 
372   PetscFunctionBegin;
373   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
374   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
375   if (ismpiaij) {
376     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
377   } else if (isseqaij) {
378     PetscCall(PetscObjectReference((PetscObject)A));
379     Aloc = A;
380   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
381 
382   PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
383   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
384   PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
385   nz = ai[Aloc->rmap->n];
386 
387   /* Allocations for A_sup */
388   if (lu->options.Fact == DOFACT) { /* first numeric factorization */
389     PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
390   } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
391     if (lu->FactPattern == SamePattern_SameRowPerm) {
392       lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
393     } else if (lu->FactPattern == SamePattern) {
394 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
395       if (lu->use3d) {
396         if (lu->grid3d.zscp.Iam == 0) {
397           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
398           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
399         } else {
400           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
401           PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
402         }
403       } else
404 #endif
405         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
406       lu->options.Fact = SamePattern;
407     } else if (lu->FactPattern == DOFACT) {
408       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
409       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
410       lu->options.Fact = DOFACT;
411       PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
412     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
413   }
414 
415   /* Copy AIJ matrix to superlu_dist matrix */
416   PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
417   PetscCall(PetscArraycpy(lu->col, aj, nz));
418   PetscCall(PetscArraycpy(lu->val, av, nz));
419   PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
420   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
421   PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
422   PetscCall(MatDestroy(&Aloc));
423 
424   /* Create and setup A_sup */
425   if (lu->options.Fact == DOFACT) {
426     PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
427   }
428 
429   /* Factor the matrix. */
430   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
431 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
432   if (lu->use3d) {
433     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
434   } else
435 #endif
436     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
437   if (sinfo > 0) {
438     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
439     if (sinfo <= lu->A_sup.ncol) {
440       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
441       PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
442     } else if (sinfo > lu->A_sup.ncol) {
443       /*
444        number of bytes allocated when memory allocation
445        failure occurred, plus A->ncol.
446        */
447       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
448       PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
449     }
450   } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
451 
452   if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
453   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
454   F->assembled     = PETSC_TRUE;
455   F->preallocated  = PETSC_TRUE;
456   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
457   PetscFunctionReturn(0);
458 }
459 
460 /* Note the Petsc r and c permutations are ignored */
461 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
462   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
463   PetscInt           M = A->rmap->N, N = A->cmap->N, indx;
464   PetscMPIInt        size, mpiflg;
465   PetscBool          flg, set;
466   const char        *colperm[]     = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
467   const char        *rowperm[]     = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
468   const char        *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
469   MPI_Comm           comm;
470   PetscSuperLU_DIST *context = NULL;
471 
472   PetscFunctionBegin;
473   /* Set options to F */
474   PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
475   PetscCallMPI(MPI_Comm_size(comm, &size));
476 
477   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
478   PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
479   if (set && !flg) lu->options.Equil = NO;
480 
481   PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
482   if (flg) {
483     switch (indx) {
484     case 0: lu->options.RowPerm = NOROWPERM; break;
485     case 1: lu->options.RowPerm = LargeDiag_MC64; break;
486     case 2: lu->options.RowPerm = LargeDiag_AWPM; break;
487     case 3: lu->options.RowPerm = MY_PERMR; break;
488     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
489     }
490   }
491 
492   PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
493   if (flg) {
494     switch (indx) {
495     case 0: lu->options.ColPerm = NATURAL; break;
496     case 1: lu->options.ColPerm = MMD_AT_PLUS_A; break;
497     case 2: lu->options.ColPerm = MMD_ATA; break;
498     case 3: lu->options.ColPerm = METIS_AT_PLUS_A; break;
499     case 4:
500       lu->options.ColPerm = PARMETIS; /* only works for np>1 */
501       break;
502     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
503     }
504   }
505 
506   lu->options.ReplaceTinyPivot = NO;
507   PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
508   if (set && flg) lu->options.ReplaceTinyPivot = YES;
509 
510   lu->options.ParSymbFact = NO;
511   PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
512   if (set && flg && size > 1) {
513 #if defined(PETSC_HAVE_PARMETIS)
514     lu->options.ParSymbFact = YES;
515     lu->options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
516 #else
517     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
518 #endif
519   }
520 
521   lu->FactPattern = SamePattern;
522   PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
523   if (flg) {
524     switch (indx) {
525     case 0: lu->FactPattern = SamePattern; break;
526     case 1: lu->FactPattern = SamePattern_SameRowPerm; break;
527     case 2: lu->FactPattern = DOFACT; break;
528     }
529   }
530 
531   lu->options.IterRefine = NOREFINE;
532   PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
533   if (set) {
534     if (flg) lu->options.IterRefine = SLU_DOUBLE;
535     else lu->options.IterRefine = NOREFINE;
536   }
537 
538   if (PetscLogPrintInfo) lu->options.PrintStat = YES;
539   else lu->options.PrintStat = NO;
540   PetscCall(PetscOptionsBool("-mat_superlu_dist_statprint", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
541 
542   /* Additional options for special cases */
543   if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
544     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0));
545     PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
546   }
547   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
548   if (!mpiflg || context->busy) { /* additional options */
549     if (!mpiflg) {
550       PetscCall(PetscNew(&context));
551       context->busy = PETSC_TRUE;
552       PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
553       PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
554     } else {
555       PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
556     }
557 
558     /* Default number of process columns and rows */
559     lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
560     if (!lu->nprow) lu->nprow = 1;
561     while (lu->nprow > 0) {
562       lu->npcol = (int_t)(size / lu->nprow);
563       if (size == lu->nprow * lu->npcol) break;
564       lu->nprow--;
565     }
566 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
567     lu->use3d = PETSC_FALSE;
568     lu->npdep = 1;
569 #endif
570 
571 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
572     PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
573     PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation");
574     if (lu->use3d) {
575       PetscInt t;
576       PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
577       t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
578       PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
579       if (lu->npdep > 1) {
580         lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
581         if (!lu->nprow) lu->nprow = 1;
582         while (lu->nprow > 0) {
583           lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
584           if (size == lu->nprow * lu->npcol * lu->npdep) break;
585           lu->nprow--;
586         }
587       }
588     }
589 #endif
590     PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
591     PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
592 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
593     PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
594 #else
595     PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
596 #endif
597     /* end of adding additional options */
598 
599 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
600     if (lu->use3d) {
601       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
602       if (context) {
603         context->grid3d = lu->grid3d;
604         context->use3d  = lu->use3d;
605       }
606     } else {
607 #endif
608       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
609       if (context) context->grid = lu->grid;
610 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
611     }
612 #endif
613     PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
614     if (mpiflg) {
615       PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
616     } else {
617       PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
618     }
619   } else { /* (mpiflg && !context->busy) */
620     PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute."));
621     context->busy = PETSC_TRUE;
622     lu->grid      = context->grid;
623   }
624   PetscOptionsEnd();
625 
626   /* Initialize ScalePermstruct and LUstruct. */
627   PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
628   PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
629   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
630   F->ops->solve           = MatSolve_SuperLU_DIST;
631   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
632   F->ops->getinertia      = NULL;
633 
634   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
635   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
636   PetscFunctionReturn(0);
637 }
638 
639 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) {
640   PetscFunctionBegin;
641   PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
642   F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) {
647   PetscFunctionBegin;
648   *type = MATSOLVERSUPERLU_DIST;
649   PetscFunctionReturn(0);
650 }
651 
652 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) {
653   Mat_SuperLU_DIST      *lu = (Mat_SuperLU_DIST *)A->data;
654   superlu_dist_options_t options;
655 
656   PetscFunctionBegin;
657   /* check if matrix is superlu_dist type */
658   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
659 
660   options = lu->options;
661   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
662   /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
663    * format spec for int64_t is set to %d for whatever reason */
664   PetscCall(PetscViewerASCIIPrintf(viewer, "  Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
665 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
666   if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
667 #endif
668 
669   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
670   PetscCall(PetscViewerASCIIPrintf(viewer, "  Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
671   PetscCall(PetscViewerASCIIPrintf(viewer, "  Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
672   PetscCall(PetscViewerASCIIPrintf(viewer, "  Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
673 
674   switch (options.RowPerm) {
675   case NOROWPERM: PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation NOROWPERM\n")); break;
676   case LargeDiag_MC64: PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_MC64\n")); break;
677   case LargeDiag_AWPM: PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_AWPM\n")); break;
678   case MY_PERMR: PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation MY_PERMR\n")); break;
679   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
680   }
681 
682   switch (options.ColPerm) {
683   case NATURAL: PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation NATURAL\n")); break;
684   case MMD_AT_PLUS_A: PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_AT_PLUS_A\n")); break;
685   case MMD_ATA: PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_ATA\n")); break;
686   /*  Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
687   case METIS_AT_PLUS_A: PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation METIS_AT_PLUS_A\n")); break;
688   case PARMETIS: PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation PARMETIS\n")); break;
689   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
690   }
691 
692   PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
693 
694   if (lu->FactPattern == SamePattern) {
695     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern\n"));
696   } else if (lu->FactPattern == SamePattern_SameRowPerm) {
697     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern_SameRowPerm\n"));
698   } else if (lu->FactPattern == DOFACT) {
699     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization DOFACT\n"));
700   } else {
701     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) {
707   PetscBool         iascii;
708   PetscViewerFormat format;
709 
710   PetscFunctionBegin;
711   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
712   if (iascii) {
713     PetscCall(PetscViewerGetFormat(viewer, &format));
714     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) {
720   Mat                    B;
721   Mat_SuperLU_DIST      *lu;
722   PetscInt               M = A->rmap->N, N = A->cmap->N;
723   PetscMPIInt            size;
724   superlu_dist_options_t options;
725 
726   PetscFunctionBegin;
727   /* Create the factorization matrix */
728   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
729   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
730   PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
731   PetscCall(MatSetUp(B));
732   B->ops->getinfo = MatGetInfo_External;
733   B->ops->view    = MatView_SuperLU_DIST;
734   B->ops->destroy = MatDestroy_SuperLU_DIST;
735 
736   /* Set the default input options:
737      options.Fact              = DOFACT;
738      options.Equil             = YES;
739      options.ParSymbFact       = NO;
740      options.ColPerm           = METIS_AT_PLUS_A;
741      options.RowPerm           = LargeDiag_MC64;
742      options.ReplaceTinyPivot  = YES;
743      options.IterRefine        = DOUBLE;
744      options.Trans             = NOTRANS;
745      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
746      options.RefineInitialized = NO;
747      options.PrintStat         = YES;
748      options.SymPattern        = NO;
749   */
750   set_default_options_dist(&options);
751 
752   B->trivialsymbolic = PETSC_TRUE;
753   if (ftype == MAT_FACTOR_LU) {
754     B->factortype            = MAT_FACTOR_LU;
755     B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
756   } else {
757     B->factortype                  = MAT_FACTOR_CHOLESKY;
758     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
759     options.SymPattern             = YES;
760   }
761 
762   /* set solvertype */
763   PetscCall(PetscFree(B->solvertype));
764   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
765 
766   PetscCall(PetscNewLog(B, &lu));
767   B->data = lu;
768   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
769 
770   lu->options              = options;
771   lu->options.Fact         = DOFACT;
772   lu->matsolve_iscalled    = PETSC_FALSE;
773   lu->matmatsolve_iscalled = PETSC_FALSE;
774 
775   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
776   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
777 
778   *F = B;
779   PetscFunctionReturn(0);
780 }
781 
782 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) {
783   PetscFunctionBegin;
784   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
785   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
786   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
787   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
788   PetscFunctionReturn(0);
789 }
790 
791 /*MC
792   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
793 
794   Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch  to have PETSc installed with SuperLU_DIST
795 
796   Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver
797 
798    Works with `MATAIJ` matrices
799 
800   Options Database Keys:
801 + -mat_superlu_dist_r <n> - number of rows in processor partition
802 . -mat_superlu_dist_c <n> - number of columns in processor partition
803 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
804 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided
805 . -mat_superlu_dist_equil - equilibrate the matrix
806 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
807 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
808 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
809 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT
810 . -mat_superlu_dist_iterrefine - use iterative refinement
811 - -mat_superlu_dist_statprint - print factorization information
812 
813   Note:
814     If PETSc was configured with --with-cuda than this solver will automatically use the GPUs.
815 
816   Level: beginner
817 
818 .seealso: `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
819 M*/
820