xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 93defd67788917e0fdcd608be2e0e9fa49e89652)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
206 {
207   MPI_Comm    comm;
208   PetscInt    dim, vStart, vEnd;
209   PetscMPIInt size;
210   PetscSF     face_sfs[3];
211   PetscScalar transforms[3][4][4] = {{{0}}};
212 
213   PetscFunctionBegin;
214   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
215   PetscCallMPI(MPI_Comm_size(comm, &size));
216   PetscCall(DMGetDimension(dm, &dim));
217   const PetscInt csize = PetscPowInt(2, dim - 1);
218   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
219 
220   PetscInt num_directions = 0;
221   for (PetscInt direction = 0; direction < dim; direction++) {
222     PetscCount   num_faces;
223     PetscInt    *faces;
224     ZCode       *donor_verts, *donor_minz;
225     PetscSFNode *leaf;
226     PetscCount   num_multiroots = 0;
227     PetscInt     pStart, pEnd;
228     PetscBool    sorted;
229     PetscInt     inum_faces;
230 
231     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
232     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
233     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
234     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
235     PetscCall(PetscMalloc1(num_faces, &donor_minz));
236     PetscCall(PetscMalloc1(num_faces, &leaf));
237     for (PetscCount i = 0; i < num_faces; i++) {
238       ZCode minz = donor_verts[i * csize];
239 
240       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
241       donor_minz[i] = minz;
242     }
243     PetscCall(PetscIntCast(num_faces, &inum_faces));
244     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
245     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
246     // Checking for sorting is a cheap check that there are no duplicates.
247     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
248     for (PetscCount i = 0; i < num_faces;) {
249       ZCode       z = donor_minz[i];
250       PetscMPIInt remote_rank, remote_count = 0;
251 
252       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
253       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
254       // Process all the vertices on this rank
255       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
256         Ijk loc = ZCodeSplit(rz);
257 
258         if (rz == z) {
259           leaf[i].rank  = remote_rank;
260           leaf[i].index = remote_count;
261           i++;
262           if (i == num_faces) break;
263           z = donor_minz[i];
264         }
265         if (IjkActive(layout->vextent, loc)) remote_count++;
266       }
267     }
268     PetscCall(PetscFree(donor_minz));
269     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
270     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
271     const PetscInt *my_donor_degree;
272     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
273     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
274 
275     for (PetscInt i = 0; i < vEnd - vStart; i++) {
276       num_multiroots += my_donor_degree[i];
277       if (my_donor_degree[i] == 0) continue;
278       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
279     }
280     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
281     PetscCount num_my_donors;
282 
283     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
284     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
285     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
286     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
287     for (PetscCount i = 0; i < num_my_donors; i++) {
288       PetscInt f = my_donors[i];
289       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
290 
291       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
292       for (PetscInt j = 0; j < num_points; j++) {
293         PetscInt p = points[2 * j];
294         if (p < vStart || vEnd <= p) continue;
295         minv = PetscMin(minv, p);
296       }
297       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
298       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
299       my_donor_indices[minv - vStart] = f;
300     }
301     PetscCall(PetscMalloc1(num_faces, &donor_indices));
302     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
303     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
304     PetscCall(PetscFree(my_donor_indices));
305     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
306     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
307     PetscCall(PetscFree(donor_indices));
308     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
309     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
310     {
311       char face_sf_name[PETSC_MAX_PATH_LEN];
312       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
313       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
314     }
315 
316     transforms[num_directions][0][0]         = 1;
317     transforms[num_directions][1][1]         = 1;
318     transforms[num_directions][2][2]         = 1;
319     transforms[num_directions][3][3]         = 1;
320     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
321     num_directions++;
322   }
323 
324   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
325   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
326 
327   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
332 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
333 // We use this crude approach here so we don't have to write new GPU kernels yet.
334 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
335 {
336   PetscFunctionBegin;
337   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
338   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
339     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
340     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
341   }
342   PetscFunctionReturn(PETSC_SUCCESS);
343 }
344 
345 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]`
346 PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure)
347 {
348   MPI_Comm           comm;
349   PetscMPIInt        rank;
350   PetscInt           nroots, nleaves;
351   PetscInt          *rootdata, *leafdata;
352   const PetscInt    *filocal;
353   const PetscSFNode *firemote;
354 
355   PetscFunctionBeginUser;
356   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
357   PetscCallMPI(MPI_Comm_rank(comm, &rank));
358 
359   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
360   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
361   for (PetscInt i = 0; i < nleaves; i++) {
362     PetscInt point = filocal[i];
363     PetscCheck(point >= pStart && point < pEnd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
364     leafdata[point] = point_sizes[point - pStart];
365   }
366   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
367   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
368 
369   PetscInt root_offset = 0;
370   PetscCall(PetscBTCreate(nroots, rootbt));
371   for (PetscInt p = 0; p < nroots; p++) {
372     const PetscInt *donor_dof = rootdata + nroots;
373     if (donor_dof[p] == 0) {
374       rootdata[2 * p]     = -1;
375       rootdata[2 * p + 1] = -1;
376       continue;
377     }
378     PetscCall(PetscBTSet(*rootbt, p));
379     PetscCheck(p >= pStart && p < pEnd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd);
380     PetscInt p_size = point_sizes[p - pStart];
381     PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size);
382     rootdata[2 * p]     = root_offset;
383     rootdata[2 * p + 1] = p_size;
384     root_offset += p_size;
385   }
386   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
387   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
388   // Count how many leaves we need to communicate the closures
389   PetscInt leaf_offset = 0;
390   for (PetscInt i = 0; i < nleaves; i++) {
391     PetscInt point = filocal[i];
392     if (leafdata[2 * point + 1] < 0) continue;
393     leaf_offset += leafdata[2 * point + 1];
394   }
395 
396   PetscSFNode *closure_leaf;
397   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
398   leaf_offset = 0;
399   for (PetscInt i = 0; i < nleaves; i++) {
400     PetscInt point   = filocal[i];
401     PetscInt cl_size = leafdata[2 * point + 1];
402     if (cl_size < 0) continue;
403     for (PetscInt j = 0; j < cl_size; j++) {
404       closure_leaf[leaf_offset].rank  = firemote[i].rank;
405       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
406       leaf_offset++;
407     }
408   }
409 
410   PetscCall(PetscSFCreate(comm, sf_closure));
411   PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
412   *rootbuffersize = root_offset;
413   *leafbuffersize = leaf_offset;
414   PetscCall(PetscFree2(rootdata, leafdata));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
419 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
420 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
421 //
422 // Output Arguments:
423 //
424 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
425 //   can be used to create a global section and section SF.
426 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
427 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
428 //
429 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
430 {
431   MPI_Comm           comm;
432   PetscMPIInt        rank;
433   PetscSF            point_sf;
434   PetscInt           nroots, nleaves;
435   const PetscInt    *filocal;
436   const PetscSFNode *firemote;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
440   PetscCallMPI(MPI_Comm_rank(comm, &rank));
441   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
442   PetscCall(PetscMalloc1(num_face_sfs, is_points));
443 
444   for (PetscInt f = 0; f < num_face_sfs; f++) {
445     PetscSF   face_sf = face_sfs[f];
446     PetscInt *cl_sizes;
447     PetscInt  fStart, fEnd, rootbuffersize, leafbuffersize;
448     PetscSF   sf_closure;
449     PetscBT   rootbt;
450 
451     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
452     PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes));
453     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
454       PetscInt cl_size, *closure = NULL;
455       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
456       cl_sizes[index] = cl_size - 1;
457       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
458     }
459 
460     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure));
461     PetscCall(PetscFree(cl_sizes));
462     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
463 
464     PetscSFNode *leaf_donor_closure;
465     { // Pack root buffer with owner for every point in the root cones
466       PetscSFNode       *donor_closure;
467       const PetscInt    *pilocal;
468       const PetscSFNode *piremote;
469       PetscInt           npoints;
470 
471       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
472       PetscCall(PetscCalloc1(rootbuffersize, &donor_closure));
473       for (PetscInt p = 0, root_offset = 0; p < nroots; p++) {
474         if (!PetscBTLookup(rootbt, p)) continue;
475         PetscInt cl_size, *closure = NULL;
476         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
477         for (PetscInt j = 1; j < cl_size; j++) {
478           PetscInt c = closure[2 * j];
479           if (pilocal) {
480             PetscInt found = -1;
481             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
482             if (found >= 0) {
483               donor_closure[root_offset++] = piremote[found];
484               continue;
485             }
486           }
487           // we own c
488           donor_closure[root_offset].rank  = rank;
489           donor_closure[root_offset].index = c;
490           root_offset++;
491         }
492         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
493       }
494 
495       PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure));
496       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
497       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
498       PetscCall(PetscSFDestroy(&sf_closure));
499       PetscCall(PetscFree(donor_closure));
500     }
501 
502     PetscSFNode *new_iremote;
503     PetscCall(PetscCalloc1(nroots, &new_iremote));
504     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
505     // Walk leaves and match vertices
506     for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) {
507       PetscInt  point   = filocal[i], cl_size;
508       PetscInt *closure = NULL;
509       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
510       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
511         PetscInt    c  = closure[2 * j];
512         PetscSFNode lc = leaf_donor_closure[leaf_offset];
513         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
514         if (new_iremote[c].rank == -1) {
515           new_iremote[c] = lc;
516         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
517         leaf_offset++;
518       }
519       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
520     }
521     PetscCall(PetscFree(leaf_donor_closure));
522 
523     // Include face points in closure SF
524     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
525     // consolidate leaves
526     PetscInt *leafdata;
527     PetscCall(PetscMalloc1(nroots, &leafdata));
528     PetscInt num_new_leaves = 0;
529     for (PetscInt i = 0; i < nroots; i++) {
530       if (new_iremote[i].rank == -1) continue;
531       new_iremote[num_new_leaves] = new_iremote[i];
532       leafdata[num_new_leaves]    = i;
533       num_new_leaves++;
534     }
535     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
536 
537     PetscSF csf;
538     PetscCall(PetscSFCreate(comm, &csf));
539     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
540     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
541     PetscCall(PetscFree(leafdata));
542     PetscCall(PetscBTDestroy(&rootbt));
543 
544     PetscInt npoints;
545     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
546     if (npoints < 0) { // empty point_sf
547       *closure_sf = csf;
548     } else {
549       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
550       PetscCall(PetscSFDestroy(&csf));
551     }
552     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
553     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
554   }
555   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
560 {
561   DM_Plex *plex = (DM_Plex *)dm->data;
562 
563   PetscFunctionBegin;
564   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
565   if (sf) *sf = plex->periodic.composed_sf;
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
570 {
571   DM_Plex    *plex = (DM_Plex *)old_dm->data;
572   PetscSF     sf_point, *new_face_sfs;
573   PetscMPIInt rank;
574 
575   PetscFunctionBegin;
576   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
577   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
578   PetscCall(DMGetPointSF(dm, &sf_point));
579   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
580 
581   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
582     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
583     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
584     const PetscInt    *old_local, *point_local;
585     const PetscSFNode *old_remote, *point_remote;
586 
587     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
588     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
589     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
590     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
591     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
592 
593     // Fill new_leafdata with new owners of all points
594     for (PetscInt i = 0; i < new_npoints; i++) {
595       new_leafdata[i].rank  = rank;
596       new_leafdata[i].index = i;
597     }
598     for (PetscInt i = 0; i < point_nleaf; i++) {
599       PetscInt j      = point_local[i];
600       new_leafdata[j] = point_remote[i];
601     }
602     // REPLACE is okay because every leaf agrees about the new owners
603     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
604     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
605     // rootdata now contains the new owners
606 
607     // Send to leaves of old space
608     for (PetscInt i = 0; i < old_npoints; i++) {
609       leafdata[i].rank  = -1;
610       leafdata[i].index = -1;
611     }
612     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
613     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
614 
615     // Send to new leaf space
616     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
617     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
618 
619     PetscInt     nface = 0, *new_local;
620     PetscSFNode *new_remote;
621     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
622     PetscCall(PetscMalloc1(nface, &new_local));
623     PetscCall(PetscMalloc1(nface, &new_remote));
624     nface = 0;
625     for (PetscInt i = 0; i < new_npoints; i++) {
626       if (new_leafdata[i].rank == -1) continue;
627       new_local[nface]  = i;
628       new_remote[nface] = new_leafdata[i];
629       nface++;
630     }
631     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
632     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
633     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
634     {
635       char new_face_sf_name[PETSC_MAX_PATH_LEN];
636       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
637       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
638     }
639   }
640 
641   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
642   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
643   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
644   PetscCall(PetscFree(new_face_sfs));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
649 {
650   DM_Plex   *plex = (DM_Plex *)dm->data;
651   PetscCount count;
652   IS         isdof;
653   PetscInt   dim;
654 
655   PetscFunctionBegin;
656   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
657   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
659 
660   PetscCall(DMGetDimension(dm, &dim));
661   dm->periodic.num_affines = plex->periodic.num_face_sfs;
662   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
663 
664   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
665     PetscInt        npoints, vsize, isize;
666     const PetscInt *points;
667     IS              is = plex->periodic.periodic_points[f];
668     PetscSegBuffer  seg;
669     PetscSection    section;
670     PetscInt       *ind;
671     Vec             L, P;
672     VecType         vec_type;
673     VecScatter      scatter;
674     PetscScalar    *x;
675 
676     PetscCall(DMGetLocalSection(dm, &section));
677     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
678     PetscCall(ISGetSize(is, &npoints));
679     PetscCall(ISGetIndices(is, &points));
680     for (PetscInt i = 0; i < npoints; i++) {
681       PetscInt point = points[i], off, dof;
682 
683       PetscCall(PetscSectionGetOffset(section, point, &off));
684       PetscCall(PetscSectionGetDof(section, point, &dof));
685       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
686       for (PetscInt j = 0; j < dof / dim; j++) {
687         PetscInt *slot;
688 
689         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
690         *slot = off / dim + j;
691       }
692     }
693     PetscCall(PetscSegBufferGetSize(seg, &count));
694     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
695     PetscCall(PetscSegBufferDestroy(&seg));
696     PetscCall(PetscIntCast(count, &isize));
697     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
698 
699     PetscCall(PetscIntCast(count * dim, &vsize));
700     PetscCall(DMGetLocalVector(dm, &L));
701     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
702     PetscCall(VecSetSizes(P, vsize, vsize));
703     PetscCall(VecGetType(L, &vec_type));
704     PetscCall(VecSetType(P, vec_type));
705     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
706     PetscCall(DMRestoreLocalVector(dm, &L));
707     PetscCall(ISDestroy(&isdof));
708 
709     PetscCall(VecGetArrayWrite(P, &x));
710     for (PetscCount i = 0; i < count; i++) {
711       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
712     }
713     PetscCall(VecRestoreArrayWrite(P, &x));
714 
715     dm->periodic.affine_to_local[f] = scatter;
716     dm->periodic.affine[f]          = P;
717   }
718   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 // We'll just orient all the edges, though only periodic boundary edges need orientation
723 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
724 {
725   PetscInt dim, eStart, eEnd;
726 
727   PetscFunctionBegin;
728   PetscCall(DMGetDimension(dm, &dim));
729   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
730   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
731   for (PetscInt e = eStart; e < eEnd; e++) {
732     const PetscInt *cone;
733     PetscCall(DMPlexGetCone(dm, e, &cone));
734     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
735   }
736   PetscFunctionReturn(PETSC_SUCCESS);
737 }
738 
739 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
740 {
741   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
742   const Ijk closure_1[] = {
743     {0, 0, 0},
744     {1, 0, 0},
745   };
746   const Ijk closure_2[] = {
747     {0, 0, 0},
748     {1, 0, 0},
749     {1, 1, 0},
750     {0, 1, 0},
751   };
752   const Ijk closure_3[] = {
753     {0, 0, 0},
754     {0, 1, 0},
755     {1, 1, 0},
756     {1, 0, 0},
757     {0, 0, 1},
758     {1, 0, 1},
759     {1, 1, 1},
760     {0, 1, 1},
761   };
762   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
763   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
764   const PetscInt        face_marker_1[]   = {1, 2};
765   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
766   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
767   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
768   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
769   // These orientations can be determined by examining cones of a reference quad and hex element.
770   const PetscInt        face_orient_1[]   = {0, 0};
771   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
772   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
773   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
774 
775   PetscFunctionBegin;
776   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
777   PetscAssertPointer(dm, 1);
778   PetscValidLogicalCollectiveInt(dm, dim, 2);
779   PetscCall(DMSetDimension(dm, dim));
780   PetscMPIInt rank, size;
781   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
782   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
783   for (PetscInt i = 0; i < dim; i++) {
784     eextent[i] = faces[i];
785     vextent[i] = faces[i] + 1;
786   }
787   ZLayout layout;
788   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
789   PetscZSet vset; // set of all vertices in the closure of the owned elements
790   PetscCall(PetscZSetCreate(&vset));
791   PetscInt local_elems = 0;
792   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
793     Ijk loc = ZCodeSplit(z);
794     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
795     else {
796       z += ZStepOct(z);
797       continue;
798     }
799     if (IjkActive(layout.eextent, loc)) {
800       local_elems++;
801       // Add all neighboring vertices to set
802       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
803         Ijk   inc  = closure_dim[dim][n];
804         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
805         ZCode v    = ZEncode(nloc);
806         PetscCall(PetscZSetAdd(vset, v));
807       }
808     }
809   }
810   PetscInt local_verts, off = 0;
811   ZCode   *vert_z;
812   PetscCall(PetscZSetGetSize(vset, &local_verts));
813   PetscCall(PetscMalloc1(local_verts, &vert_z));
814   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
815   PetscCall(PetscZSetDestroy(&vset));
816   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
817   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
818 
819   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
820   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
821   PetscCall(DMSetUp(dm));
822   {
823     PetscInt e = 0;
824     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
825       Ijk loc = ZCodeSplit(z);
826       if (!IjkActive(layout.eextent, loc)) {
827         z += ZStepOct(z);
828         continue;
829       }
830       PetscInt cone[8], orient[8] = {0};
831       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
832         Ijk      inc  = closure_dim[dim][n];
833         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
834         ZCode    v    = ZEncode(nloc);
835         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
836         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
837         cone[n] = local_elems + ci;
838       }
839       PetscCall(DMPlexSetCone(dm, e, cone));
840       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
841       e++;
842     }
843   }
844 
845   PetscCall(DMPlexSymmetrize(dm));
846   PetscCall(DMPlexStratify(dm));
847 
848   { // Create point SF
849     PetscSF sf;
850     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
851     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
852     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
853     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
854     PetscInt    *local_ghosts;
855     PetscSFNode *ghosts;
856     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
857     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
858     for (PetscInt i = 0; i < num_ghosts;) {
859       ZCode       z = vert_z[owned_verts + i];
860       PetscMPIInt remote_rank, remote_count = 0;
861 
862       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
863       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
864       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
865 
866       // Count the elements on remote_rank
867       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
868 
869       // Traverse vertices and make ghost links
870       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
871         Ijk loc = ZCodeSplit(rz);
872         if (rz == z) {
873           local_ghosts[i] = local_elems + owned_verts + i;
874           ghosts[i].rank  = remote_rank;
875           ghosts[i].index = remote_elem + remote_count;
876           i++;
877           if (i == num_ghosts) break;
878           z = vert_z[owned_verts + i];
879         }
880         if (IjkActive(layout.vextent, loc)) remote_count++;
881         else rz += ZStepOct(rz);
882       }
883     }
884     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
885     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
886     PetscCall(DMSetPointSF(dm, sf));
887     PetscCall(PetscSFDestroy(&sf));
888   }
889   {
890     Vec          coordinates;
891     PetscScalar *coords;
892     PetscSection coord_section;
893     PetscInt     coord_size;
894     PetscCall(DMGetCoordinateSection(dm, &coord_section));
895     PetscCall(PetscSectionSetNumFields(coord_section, 1));
896     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
897     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
898     for (PetscInt v = 0; v < local_verts; v++) {
899       PetscInt point = local_elems + v;
900       PetscCall(PetscSectionSetDof(coord_section, point, dim));
901       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
902     }
903     PetscCall(PetscSectionSetUp(coord_section));
904     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
905     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
906     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
907     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
908     PetscCall(VecSetBlockSize(coordinates, dim));
909     PetscCall(VecSetType(coordinates, VECSTANDARD));
910     PetscCall(VecGetArray(coordinates, &coords));
911     for (PetscInt v = 0; v < local_verts; v++) {
912       Ijk loc             = ZCodeSplit(vert_z[v]);
913       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
914       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
915       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
916     }
917     PetscCall(VecRestoreArray(coordinates, &coords));
918     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
919     PetscCall(VecDestroy(&coordinates));
920   }
921   if (interpolate) {
922     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
923     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
924     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
925     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
926     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
927     // be needed in a general CGNS reader, for example.
928     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
929 
930     DMLabel label;
931     PetscCall(DMCreateLabel(dm, "Face Sets"));
932     PetscCall(DMGetLabel(dm, "Face Sets", &label));
933     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
934     for (PetscInt i = 0; i < 3; i++) {
935       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
936       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
937       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
938     }
939     PetscInt fStart, fEnd, vStart, vEnd;
940     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
941     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
942     for (PetscInt f = fStart; f < fEnd; f++) {
943       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
944       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
945       PetscInt bc_count[6] = {0};
946       for (PetscInt i = 0; i < npoints; i++) {
947         PetscInt p = points[2 * i];
948         if (p < vStart || vEnd <= p) continue;
949         fverts[num_fverts++] = p;
950         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
951         // Convention here matches DMPlexCreateCubeMesh_Internal
952         bc_count[0] += loc.i == 0;
953         bc_count[1] += loc.i == layout.vextent.i - 1;
954         bc_count[2] += loc.j == 0;
955         bc_count[3] += loc.j == layout.vextent.j - 1;
956         bc_count[4] += loc.k == 0;
957         bc_count[5] += loc.k == layout.vextent.k - 1;
958       }
959       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
960       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
961         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
962           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
963           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
964             PetscInt *put;
965             if (bc % 2 == 0) { // donor face; no label
966               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
967               *put = f;
968             } else { // periodic face
969               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
970               *put = f;
971               ZCode *zput;
972               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
973               for (PetscInt i = 0; i < num_fverts; i++) {
974                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
975                 switch (bc / 2) {
976                 case 0:
977                   loc.i = 0;
978                   break;
979                 case 1:
980                   loc.j = 0;
981                   break;
982                 case 2:
983                   loc.k = 0;
984                   break;
985                 }
986                 *zput++ = ZEncode(loc);
987               }
988             }
989             continue;
990           }
991           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
992           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
993           bc_match++;
994         }
995       }
996     }
997     // Ensure that the Coordinate DM has our new boundary labels
998     DM cdm;
999     PetscCall(DMGetCoordinateDM(dm, &cdm));
1000     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
1001     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
1002       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
1003     }
1004     for (PetscInt i = 0; i < 3; i++) {
1005       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
1006       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
1007       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
1008     }
1009   }
1010   PetscCall(PetscFree(layout.zstarts));
1011   PetscCall(PetscFree(vert_z));
1012   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 /*@
1017   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
1018 
1019   Logically Collective
1020 
1021   Input Parameters:
1022 + dm           - The `DMPLEX` on which to set periodicity
1023 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
1024 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1025 
1026   Level: advanced
1027 
1028   Note:
1029   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
1030   and locally, but are paired when creating a global dof space.
1031 
1032 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1033 @*/
1034 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1035 {
1036   DM_Plex *plex = (DM_Plex *)dm->data;
1037 
1038   PetscFunctionBegin;
1039   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1040   if (num_face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1041   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1042 
1043   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1044 
1045   if (plex->periodic.num_face_sfs > 0) {
1046     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1047     PetscCall(PetscFree(plex->periodic.face_sfs));
1048   }
1049 
1050   plex->periodic.num_face_sfs = num_face_sfs;
1051   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1052   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1053 
1054   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1055   if (cdm) {
1056     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1057     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1058   }
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 /*@C
1063   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1064 
1065   Logically Collective
1066 
1067   Input Parameter:
1068 . dm - The `DMPLEX` for which to obtain periodic relation
1069 
1070   Output Parameters:
1071 + num_face_sfs - Number of `PetscSF`s in the array
1072 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1073 
1074   Level: advanced
1075 
1076 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1077 @*/
1078 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1079 {
1080   DM_Plex *plex = (DM_Plex *)dm->data;
1081 
1082   PetscFunctionBegin;
1083   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1084   *face_sfs     = plex->periodic.face_sfs;
1085   *num_face_sfs = plex->periodic.num_face_sfs;
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 /*@C
1090   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1091 
1092   Logically Collective
1093 
1094   Input Parameters:
1095 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1096 . n  - Number of transforms in array
1097 - t  - Array of 4x4 affine transformation basis.
1098 
1099   Level: advanced
1100 
1101   Notes:
1102   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1103   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1104   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1105   simple matrix multiplication.
1106 
1107   Although the interface accepts a general affine transform, only affine translation is supported at present.
1108 
1109   Developer Notes:
1110   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1111   adding GPU implementations to apply the G2L/L2G transforms.
1112 
1113 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1114 @*/
1115 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1116 {
1117   DM_Plex *plex = (DM_Plex *)dm->data;
1118 
1119   PetscFunctionBegin;
1120   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1121   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1122 
1123   PetscCall(PetscFree(plex->periodic.transform));
1124   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1125   for (PetscInt i = 0; i < n; i++) {
1126     for (PetscInt j = 0; j < 4; j++) {
1127       for (PetscInt k = 0; k < 4; k++) {
1128         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1129         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1130       }
1131     }
1132   }
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135