1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petscsf.h> 3 #include <petsc/private/hashset.h> 4 5 typedef uint64_t ZCode; 6 7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual) 8 9 typedef struct { 10 PetscInt i, j, k; 11 } Ijk; 12 13 typedef struct { 14 Ijk eextent; 15 Ijk vextent; 16 PetscMPIInt comm_size; 17 ZCode *zstarts; 18 } ZLayout; 19 20 // ***** Overview of ZCode ******* 21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index. 22 // This is known as Morton encoding, and is refered to as ZCode in this code. 23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be: 24 // [k2,j2,i2,k1,j1,i1,k0,j0,i0] 25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`). 26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices. 27 28 // Decodes the leading interleaved index from a ZCode 29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0] 30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal) 31 static unsigned ZCodeSplit1(ZCode z) 32 { 33 z &= 0111111111111111111111; 34 z = (z | z >> 2) & 0103030303030303030303; 35 z = (z | z >> 4) & 0100170017001700170017; 36 z = (z | z >> 8) & 0370000037700000377; 37 z = (z | z >> 16) & 0370000000000177777; 38 z = (z | z >> 32) & 07777777; 39 return (unsigned)z; 40 } 41 42 // Encodes the leading interleaved index from a ZCode 43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0] 44 static ZCode ZEncode1(unsigned t) 45 { 46 ZCode z = t; 47 z &= 07777777; 48 z = (z | z << 32) & 0370000000000177777; 49 z = (z | z << 16) & 0370000037700000377; 50 z = (z | z << 8) & 0100170017001700170017; 51 z = (z | z << 4) & 0103030303030303030303; 52 z = (z | z << 2) & 0111111111111111111111; 53 return z; 54 } 55 56 // Decodes i j k indices from a ZCode. 57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode 58 static Ijk ZCodeSplit(ZCode z) 59 { 60 Ijk c; 61 c.i = ZCodeSplit1(z >> 2); 62 c.j = ZCodeSplit1(z >> 1); 63 c.k = ZCodeSplit1(z >> 0); 64 return c; 65 } 66 67 // Encodes i j k indices to a ZCode. 68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement 69 static ZCode ZEncode(Ijk c) 70 { 71 ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k); 72 return z; 73 } 74 75 static PetscBool IjkActive(Ijk extent, Ijk l) 76 { 77 if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE; 78 return PETSC_FALSE; 79 } 80 81 // If z is not the base of an octet (last 3 bits 0), return 0. 82 // 83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z 84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point. 85 static ZCode ZStepOct(ZCode z) 86 { 87 if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0 88 ZCode step = 07; 89 for (; (z & step) == 0; step = (step << 3) | 07) { } 90 return step >> 3; 91 } 92 93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous. 94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout) 95 { 96 PetscFunctionBegin; 97 layout->eextent.i = eextent[0]; 98 layout->eextent.j = eextent[1]; 99 layout->eextent.k = eextent[2]; 100 layout->vextent.i = vextent[0]; 101 layout->vextent.j = vextent[1]; 102 layout->vextent.k = vextent[2]; 103 layout->comm_size = size; 104 layout->zstarts = NULL; 105 PetscCall(PetscMalloc1(size + 1, &layout->zstarts)); 106 107 PetscInt total_elems = eextent[0] * eextent[1] * eextent[2]; 108 ZCode z = 0; 109 layout->zstarts[0] = 0; 110 // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound 111 for (PetscMPIInt r = 0; r < size; r++) { 112 PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count; 113 for (count = 0; count < elems_needed; z++) { 114 ZCode skip = ZStepOct(z); // optimistically attempt a longer step 115 for (ZCode s = skip;; s >>= 3) { 116 Ijk trial = ZCodeSplit(z + s); 117 if (IjkActive(layout->eextent, trial)) { 118 while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet 119 count += s + 1; 120 z += s; 121 break; 122 } 123 if (s == 0) { // the whole skip octet is inactive 124 z += skip; 125 break; 126 } 127 } 128 } 129 // Pick up any extra vertices in the Z ordering before the next rank's first owned element. 130 // 131 // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up 132 // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will 133 // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of 134 // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution). 135 // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would 136 // complicate the job of identifying an owner and its offset. 137 // 138 // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is 139 // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's 140 // the issue: 141 // 142 // Consider this partition on rank 0 (left) and rank 1. 143 // 144 // 4 -------- 5 -- 14 --10 -- 21 --11 145 // | | | 146 // 7 -- 16 -- 8 | | | 147 // | | 3 ------- 7 ------- 9 148 // | | | | 149 // 4 -------- 6 ------ 10 | | 150 // | | | 6 -- 16 -- 8 151 // | | | 152 // 3 ---11--- 5 --18-- 9 153 // 154 // The periodic face SF looks like 155 // [0] Number of roots=21, leaves=1, remote ranks=1 156 // [0] 16 <- (0,11) 157 // [1] Number of roots=22, leaves=2, remote ranks=2 158 // [1] 14 <- (0,18) 159 // [1] 21 <- (1,16) 160 // 161 // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use 162 // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face 163 // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16). 164 // 165 // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not 166 // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer 167 // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4) 168 // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the 169 // stranded vertices. 170 for (; z <= ZEncode(layout->vextent); z++) { 171 Ijk loc = ZCodeSplit(z); 172 if (IjkActive(layout->eextent, loc)) break; 173 z += ZStepOct(z); 174 } 175 layout->zstarts[r + 1] = z; 176 } 177 layout->zstarts[size] = ZEncode(layout->vextent); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank) 182 { 183 PetscInt remote_elem = 0; 184 for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) { 185 Ijk loc = ZCodeSplit(rz); 186 if (IjkActive(layout->eextent, loc)) remote_elem++; 187 else rz += ZStepOct(rz); 188 } 189 return remote_elem; 190 } 191 192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[]) 193 { 194 PetscInt lo = 0, hi = n; 195 196 if (n == 0) return -1; 197 while (hi - lo > 1) { 198 PetscInt mid = lo + (hi - lo) / 2; 199 if (key < X[mid]) hi = mid; 200 else lo = mid; 201 } 202 return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1); 203 } 204 205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3]) 206 { 207 MPI_Comm comm; 208 PetscInt dim, vStart, vEnd; 209 PetscMPIInt size; 210 PetscSF face_sfs[3]; 211 PetscScalar transforms[3][4][4] = {{{0}}}; 212 213 PetscFunctionBegin; 214 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 215 PetscCallMPI(MPI_Comm_size(comm, &size)); 216 PetscCall(DMGetDimension(dm, &dim)); 217 const PetscInt csize = PetscPowInt(2, dim - 1); 218 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 219 220 PetscInt num_directions = 0; 221 for (PetscInt direction = 0; direction < dim; direction++) { 222 PetscCount num_faces; 223 PetscInt *faces; 224 ZCode *donor_verts, *donor_minz; 225 PetscSFNode *leaf; 226 PetscCount num_multiroots = 0; 227 PetscInt pStart, pEnd; 228 PetscBool sorted; 229 PetscInt inum_faces; 230 231 if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue; 232 PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces)); 233 PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces)); 234 PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts)); 235 PetscCall(PetscMalloc1(num_faces, &donor_minz)); 236 PetscCall(PetscMalloc1(num_faces, &leaf)); 237 for (PetscCount i = 0; i < num_faces; i++) { 238 ZCode minz = donor_verts[i * csize]; 239 240 for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]); 241 donor_minz[i] = minz; 242 } 243 PetscCall(PetscIntCast(num_faces, &inum_faces)); 244 PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted)); 245 // If a donor vertex were chosen to broker multiple faces, we would have a logic error. 246 // Checking for sorting is a cheap check that there are no duplicates. 247 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked"); 248 for (PetscCount i = 0; i < num_faces;) { 249 ZCode z = donor_minz[i]; 250 PetscMPIInt remote_rank, remote_count = 0; 251 252 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank)); 253 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 254 // Process all the vertices on this rank 255 for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) { 256 Ijk loc = ZCodeSplit(rz); 257 258 if (rz == z) { 259 leaf[i].rank = remote_rank; 260 leaf[i].index = remote_count; 261 i++; 262 if (i == num_faces) break; 263 z = donor_minz[i]; 264 } 265 if (IjkActive(layout->vextent, loc)) remote_count++; 266 } 267 } 268 PetscCall(PetscFree(donor_minz)); 269 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions])); 270 PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER)); 271 const PetscInt *my_donor_degree; 272 PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree)); 273 PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree)); 274 275 for (PetscInt i = 0; i < vEnd - vStart; i++) { 276 num_multiroots += my_donor_degree[i]; 277 if (my_donor_degree[i] == 0) continue; 278 PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces"); 279 } 280 PetscInt *my_donors, *donor_indices, *my_donor_indices; 281 PetscCount num_my_donors; 282 283 PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors)); 284 PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots); 285 PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors)); 286 PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices)); 287 for (PetscCount i = 0; i < num_my_donors; i++) { 288 PetscInt f = my_donors[i]; 289 PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX; 290 291 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 292 for (PetscInt j = 0; j < num_points; j++) { 293 PetscInt p = points[2 * j]; 294 if (p < vStart || vEnd <= p) continue; 295 minv = PetscMin(minv, p); 296 } 297 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 298 PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested"); 299 my_donor_indices[minv - vStart] = f; 300 } 301 PetscCall(PetscMalloc1(num_faces, &donor_indices)); 302 PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 303 PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 304 PetscCall(PetscFree(my_donor_indices)); 305 // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces. 306 for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i]; 307 PetscCall(PetscFree(donor_indices)); 308 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 309 PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER)); 310 { 311 char face_sf_name[PETSC_MAX_PATH_LEN]; 312 PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions)); 313 PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name)); 314 } 315 316 transforms[num_directions][0][0] = 1; 317 transforms[num_directions][1][1] = 1; 318 transforms[num_directions][2][2] = 1; 319 transforms[num_directions][3][3] = 1; 320 transforms[num_directions][direction][3] = upper[direction] - lower[direction]; 321 num_directions++; 322 } 323 324 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs)); 325 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms)); 326 327 for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i])); 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to 332 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector). 333 // We use this crude approach here so we don't have to write new GPU kernels yet. 334 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx) 335 { 336 PetscFunctionBegin; 337 // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped. 338 for (PetscInt i = 0; i < dm->periodic.num_affines; i++) { 339 PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 340 PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 341 } 342 PetscFunctionReturn(PETSC_SUCCESS); 343 } 344 345 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]` 346 PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure) 347 { 348 MPI_Comm comm; 349 PetscMPIInt rank; 350 PetscInt nroots, nleaves; 351 PetscInt *rootdata, *leafdata; 352 const PetscInt *filocal; 353 const PetscSFNode *firemote; 354 355 PetscFunctionBeginUser; 356 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 357 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 358 359 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 360 PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata)); 361 for (PetscInt i = 0; i < nleaves; i++) { 362 PetscInt point = filocal[i]; 363 PetscCheck(point >= pStart && point < pEnd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd); 364 leafdata[point] = point_sizes[point - pStart]; 365 } 366 PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 367 PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 368 369 PetscInt root_offset = 0; 370 PetscCall(PetscBTCreate(nroots, rootbt)); 371 for (PetscInt p = 0; p < nroots; p++) { 372 const PetscInt *donor_dof = rootdata + nroots; 373 if (donor_dof[p] == 0) { 374 rootdata[2 * p] = -1; 375 rootdata[2 * p + 1] = -1; 376 continue; 377 } 378 PetscCall(PetscBTSet(*rootbt, p)); 379 PetscCheck(p >= pStart && p < pEnd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd); 380 PetscInt p_size = point_sizes[p - pStart]; 381 PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size); 382 rootdata[2 * p] = root_offset; 383 rootdata[2 * p + 1] = p_size; 384 root_offset += p_size; 385 } 386 PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 387 PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 388 // Count how many leaves we need to communicate the closures 389 PetscInt leaf_offset = 0; 390 for (PetscInt i = 0; i < nleaves; i++) { 391 PetscInt point = filocal[i]; 392 if (leafdata[2 * point + 1] < 0) continue; 393 leaf_offset += leafdata[2 * point + 1]; 394 } 395 396 PetscSFNode *closure_leaf; 397 PetscCall(PetscMalloc1(leaf_offset, &closure_leaf)); 398 leaf_offset = 0; 399 for (PetscInt i = 0; i < nleaves; i++) { 400 PetscInt point = filocal[i]; 401 PetscInt cl_size = leafdata[2 * point + 1]; 402 if (cl_size < 0) continue; 403 for (PetscInt j = 0; j < cl_size; j++) { 404 closure_leaf[leaf_offset].rank = firemote[i].rank; 405 closure_leaf[leaf_offset].index = leafdata[2 * point] + j; 406 leaf_offset++; 407 } 408 } 409 410 PetscCall(PetscSFCreate(comm, sf_closure)); 411 PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER)); 412 *rootbuffersize = root_offset; 413 *leafbuffersize = leaf_offset; 414 PetscCall(PetscFree2(rootdata, leafdata)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure 419 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures 420 // are isomorphic. It may be useful/necessary for this restriction to be loosened. 421 // 422 // Output Arguments: 423 // 424 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This 425 // can be used to create a global section and section SF. 426 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine 427 // transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal(). 428 // 429 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points) 430 { 431 MPI_Comm comm; 432 PetscMPIInt rank; 433 PetscSF point_sf; 434 PetscInt nroots, nleaves; 435 const PetscInt *filocal; 436 const PetscSFNode *firemote; 437 438 PetscFunctionBegin; 439 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 440 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 441 PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points 442 PetscCall(PetscMalloc1(num_face_sfs, is_points)); 443 444 for (PetscInt f = 0; f < num_face_sfs; f++) { 445 PetscSF face_sf = face_sfs[f]; 446 PetscInt *cl_sizes; 447 PetscInt fStart, fEnd, rootbuffersize, leafbuffersize; 448 PetscSF sf_closure; 449 PetscBT rootbt; 450 451 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 452 PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes)); 453 for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) { 454 PetscInt cl_size, *closure = NULL; 455 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure)); 456 cl_sizes[index] = cl_size - 1; 457 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure)); 458 } 459 460 PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure)); 461 PetscCall(PetscFree(cl_sizes)); 462 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 463 464 PetscSFNode *leaf_donor_closure; 465 { // Pack root buffer with owner for every point in the root cones 466 PetscSFNode *donor_closure; 467 const PetscInt *pilocal; 468 const PetscSFNode *piremote; 469 PetscInt npoints; 470 471 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote)); 472 PetscCall(PetscCalloc1(rootbuffersize, &donor_closure)); 473 for (PetscInt p = 0, root_offset = 0; p < nroots; p++) { 474 if (!PetscBTLookup(rootbt, p)) continue; 475 PetscInt cl_size, *closure = NULL; 476 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 477 for (PetscInt j = 1; j < cl_size; j++) { 478 PetscInt c = closure[2 * j]; 479 if (pilocal) { 480 PetscInt found = -1; 481 if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found)); 482 if (found >= 0) { 483 donor_closure[root_offset++] = piremote[found]; 484 continue; 485 } 486 } 487 // we own c 488 donor_closure[root_offset].rank = rank; 489 donor_closure[root_offset].index = c; 490 root_offset++; 491 } 492 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 493 } 494 495 PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure)); 496 PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 498 PetscCall(PetscSFDestroy(&sf_closure)); 499 PetscCall(PetscFree(donor_closure)); 500 } 501 502 PetscSFNode *new_iremote; 503 PetscCall(PetscCalloc1(nroots, &new_iremote)); 504 for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1; 505 // Walk leaves and match vertices 506 for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) { 507 PetscInt point = filocal[i], cl_size; 508 PetscInt *closure = NULL; 509 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 510 for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency? 511 PetscInt c = closure[2 * j]; 512 PetscSFNode lc = leaf_donor_closure[leaf_offset]; 513 // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index); 514 if (new_iremote[c].rank == -1) { 515 new_iremote[c] = lc; 516 } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces"); 517 leaf_offset++; 518 } 519 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 520 } 521 PetscCall(PetscFree(leaf_donor_closure)); 522 523 // Include face points in closure SF 524 for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i]; 525 // consolidate leaves 526 PetscInt *leafdata; 527 PetscCall(PetscMalloc1(nroots, &leafdata)); 528 PetscInt num_new_leaves = 0; 529 for (PetscInt i = 0; i < nroots; i++) { 530 if (new_iremote[i].rank == -1) continue; 531 new_iremote[num_new_leaves] = new_iremote[i]; 532 leafdata[num_new_leaves] = i; 533 num_new_leaves++; 534 } 535 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f])); 536 537 PetscSF csf; 538 PetscCall(PetscSFCreate(comm, &csf)); 539 PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES)); 540 PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be 541 PetscCall(PetscFree(leafdata)); 542 PetscCall(PetscBTDestroy(&rootbt)); 543 544 PetscInt npoints; 545 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL)); 546 if (npoints < 0) { // empty point_sf 547 *closure_sf = csf; 548 } else { 549 PetscCall(PetscSFMerge(point_sf, csf, closure_sf)); 550 PetscCall(PetscSFDestroy(&csf)); 551 } 552 if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge 553 point_sf = *closure_sf; // Use combined point + isoperiodic SF to define point ownership for further face_sf 554 } 555 PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points")); 556 PetscFunctionReturn(PETSC_SUCCESS); 557 } 558 559 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf) 560 { 561 DM_Plex *plex = (DM_Plex *)dm->data; 562 563 PetscFunctionBegin; 564 if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points)); 565 if (sf) *sf = plex->periodic.composed_sf; 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration) 570 { 571 DM_Plex *plex = (DM_Plex *)old_dm->data; 572 PetscSF sf_point, *new_face_sfs; 573 PetscMPIInt rank; 574 575 PetscFunctionBegin; 576 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 577 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 578 PetscCall(DMGetPointSF(dm, &sf_point)); 579 PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs)); 580 581 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 582 PetscInt old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf; 583 PetscSFNode *new_leafdata, *rootdata, *leafdata; 584 const PetscInt *old_local, *point_local; 585 const PetscSFNode *old_remote, *point_remote; 586 587 PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote)); 588 PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL)); 589 PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote)); 590 PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space"); 591 PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata)); 592 593 // Fill new_leafdata with new owners of all points 594 for (PetscInt i = 0; i < new_npoints; i++) { 595 new_leafdata[i].rank = rank; 596 new_leafdata[i].index = i; 597 } 598 for (PetscInt i = 0; i < point_nleaf; i++) { 599 PetscInt j = point_local[i]; 600 new_leafdata[j] = point_remote[i]; 601 } 602 // REPLACE is okay because every leaf agrees about the new owners 603 PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 604 PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 605 // rootdata now contains the new owners 606 607 // Send to leaves of old space 608 for (PetscInt i = 0; i < old_npoints; i++) { 609 leafdata[i].rank = -1; 610 leafdata[i].index = -1; 611 } 612 PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 613 PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 614 615 // Send to new leaf space 616 PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 617 PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 618 619 PetscInt nface = 0, *new_local; 620 PetscSFNode *new_remote; 621 for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0); 622 PetscCall(PetscMalloc1(nface, &new_local)); 623 PetscCall(PetscMalloc1(nface, &new_remote)); 624 nface = 0; 625 for (PetscInt i = 0; i < new_npoints; i++) { 626 if (new_leafdata[i].rank == -1) continue; 627 new_local[nface] = i; 628 new_remote[nface] = new_leafdata[i]; 629 nface++; 630 } 631 PetscCall(PetscFree3(rootdata, leafdata, new_leafdata)); 632 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f])); 633 PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER)); 634 { 635 char new_face_sf_name[PETSC_MAX_PATH_LEN]; 636 PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f)); 637 PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name)); 638 } 639 } 640 641 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs)); 642 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform)); 643 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f])); 644 PetscCall(PetscFree(new_face_sfs)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm) 649 { 650 DM_Plex *plex = (DM_Plex *)dm->data; 651 PetscCount count; 652 IS isdof; 653 PetscInt dim; 654 655 PetscFunctionBegin; 656 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 657 PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 659 660 PetscCall(DMGetDimension(dm, &dim)); 661 dm->periodic.num_affines = plex->periodic.num_face_sfs; 662 PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine)); 663 664 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 665 PetscInt npoints, vsize, isize; 666 const PetscInt *points; 667 IS is = plex->periodic.periodic_points[f]; 668 PetscSegBuffer seg; 669 PetscSection section; 670 PetscInt *ind; 671 Vec L, P; 672 VecType vec_type; 673 VecScatter scatter; 674 PetscScalar *x; 675 676 PetscCall(DMGetLocalSection(dm, §ion)); 677 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg)); 678 PetscCall(ISGetSize(is, &npoints)); 679 PetscCall(ISGetIndices(is, &points)); 680 for (PetscInt i = 0; i < npoints; i++) { 681 PetscInt point = points[i], off, dof; 682 683 PetscCall(PetscSectionGetOffset(section, point, &off)); 684 PetscCall(PetscSectionGetDof(section, point, &dof)); 685 PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim); 686 for (PetscInt j = 0; j < dof / dim; j++) { 687 PetscInt *slot; 688 689 PetscCall(PetscSegBufferGetInts(seg, 1, &slot)); 690 *slot = off / dim + j; 691 } 692 } 693 PetscCall(PetscSegBufferGetSize(seg, &count)); 694 PetscCall(PetscSegBufferExtractAlloc(seg, &ind)); 695 PetscCall(PetscSegBufferDestroy(&seg)); 696 PetscCall(PetscIntCast(count, &isize)); 697 PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof)); 698 699 PetscCall(PetscIntCast(count * dim, &vsize)); 700 PetscCall(DMGetLocalVector(dm, &L)); 701 PetscCall(VecCreate(PETSC_COMM_SELF, &P)); 702 PetscCall(VecSetSizes(P, vsize, vsize)); 703 PetscCall(VecGetType(L, &vec_type)); 704 PetscCall(VecSetType(P, vec_type)); 705 PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter)); 706 PetscCall(DMRestoreLocalVector(dm, &L)); 707 PetscCall(ISDestroy(&isdof)); 708 709 PetscCall(VecGetArrayWrite(P, &x)); 710 for (PetscCount i = 0; i < count; i++) { 711 for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3]; 712 } 713 PetscCall(VecRestoreArrayWrite(P, &x)); 714 715 dm->periodic.affine_to_local[f] = scatter; 716 dm->periodic.affine[f] = P; 717 } 718 PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 // We'll just orient all the edges, though only periodic boundary edges need orientation 723 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm) 724 { 725 PetscInt dim, eStart, eEnd; 726 727 PetscFunctionBegin; 728 PetscCall(DMGetDimension(dm, &dim)); 729 if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary 730 PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd)); 731 for (PetscInt e = eStart; e < eEnd; e++) { 732 const PetscInt *cone; 733 PetscCall(DMPlexGetCone(dm, e, &cone)); 734 if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1)); 735 } 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate) 740 { 741 PetscInt eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1}; 742 const Ijk closure_1[] = { 743 {0, 0, 0}, 744 {1, 0, 0}, 745 }; 746 const Ijk closure_2[] = { 747 {0, 0, 0}, 748 {1, 0, 0}, 749 {1, 1, 0}, 750 {0, 1, 0}, 751 }; 752 const Ijk closure_3[] = { 753 {0, 0, 0}, 754 {0, 1, 0}, 755 {1, 1, 0}, 756 {1, 0, 0}, 757 {0, 0, 1}, 758 {1, 0, 1}, 759 {1, 1, 1}, 760 {0, 1, 1}, 761 }; 762 const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3}; 763 // This must be kept consistent with DMPlexCreateCubeMesh_Internal 764 const PetscInt face_marker_1[] = {1, 2}; 765 const PetscInt face_marker_2[] = {4, 2, 1, 3}; 766 const PetscInt face_marker_3[] = {6, 5, 3, 4, 1, 2}; 767 const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3}; 768 // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero. 769 // These orientations can be determined by examining cones of a reference quad and hex element. 770 const PetscInt face_orient_1[] = {0, 0}; 771 const PetscInt face_orient_2[] = {-1, 0, 0, -1}; 772 const PetscInt face_orient_3[] = {-2, 0, -2, 1, -2, 0}; 773 const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3}; 774 775 PetscFunctionBegin; 776 PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 777 PetscAssertPointer(dm, 1); 778 PetscValidLogicalCollectiveInt(dm, dim, 2); 779 PetscCall(DMSetDimension(dm, dim)); 780 PetscMPIInt rank, size; 781 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 782 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 783 for (PetscInt i = 0; i < dim; i++) { 784 eextent[i] = faces[i]; 785 vextent[i] = faces[i] + 1; 786 } 787 ZLayout layout; 788 PetscCall(ZLayoutCreate(size, eextent, vextent, &layout)); 789 PetscZSet vset; // set of all vertices in the closure of the owned elements 790 PetscCall(PetscZSetCreate(&vset)); 791 PetscInt local_elems = 0; 792 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 793 Ijk loc = ZCodeSplit(z); 794 if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z)); 795 else { 796 z += ZStepOct(z); 797 continue; 798 } 799 if (IjkActive(layout.eextent, loc)) { 800 local_elems++; 801 // Add all neighboring vertices to set 802 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 803 Ijk inc = closure_dim[dim][n]; 804 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 805 ZCode v = ZEncode(nloc); 806 PetscCall(PetscZSetAdd(vset, v)); 807 } 808 } 809 } 810 PetscInt local_verts, off = 0; 811 ZCode *vert_z; 812 PetscCall(PetscZSetGetSize(vset, &local_verts)); 813 PetscCall(PetscMalloc1(local_verts, &vert_z)); 814 PetscCall(PetscZSetGetElems(vset, &off, vert_z)); 815 PetscCall(PetscZSetDestroy(&vset)); 816 // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed 817 PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z)); 818 819 PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts)); 820 for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim))); 821 PetscCall(DMSetUp(dm)); 822 { 823 PetscInt e = 0; 824 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 825 Ijk loc = ZCodeSplit(z); 826 if (!IjkActive(layout.eextent, loc)) { 827 z += ZStepOct(z); 828 continue; 829 } 830 PetscInt cone[8], orient[8] = {0}; 831 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 832 Ijk inc = closure_dim[dim][n]; 833 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 834 ZCode v = ZEncode(nloc); 835 PetscInt ci = ZCodeFind(v, local_verts, vert_z); 836 PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set"); 837 cone[n] = local_elems + ci; 838 } 839 PetscCall(DMPlexSetCone(dm, e, cone)); 840 PetscCall(DMPlexSetConeOrientation(dm, e, orient)); 841 e++; 842 } 843 } 844 845 PetscCall(DMPlexSymmetrize(dm)); 846 PetscCall(DMPlexStratify(dm)); 847 848 { // Create point SF 849 PetscSF sf; 850 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf)); 851 PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z); 852 if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found 853 PetscInt num_ghosts = local_verts - owned_verts; // Due to sorting, owned vertices always come first 854 PetscInt *local_ghosts; 855 PetscSFNode *ghosts; 856 PetscCall(PetscMalloc1(num_ghosts, &local_ghosts)); 857 PetscCall(PetscMalloc1(num_ghosts, &ghosts)); 858 for (PetscInt i = 0; i < num_ghosts;) { 859 ZCode z = vert_z[owned_verts + i]; 860 PetscMPIInt remote_rank, remote_count = 0; 861 862 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank)); 863 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 864 // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z) 865 866 // Count the elements on remote_rank 867 PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank); 868 869 // Traverse vertices and make ghost links 870 for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) { 871 Ijk loc = ZCodeSplit(rz); 872 if (rz == z) { 873 local_ghosts[i] = local_elems + owned_verts + i; 874 ghosts[i].rank = remote_rank; 875 ghosts[i].index = remote_elem + remote_count; 876 i++; 877 if (i == num_ghosts) break; 878 z = vert_z[owned_verts + i]; 879 } 880 if (IjkActive(layout.vextent, loc)) remote_count++; 881 else rz += ZStepOct(rz); 882 } 883 } 884 PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER)); 885 PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF")); 886 PetscCall(DMSetPointSF(dm, sf)); 887 PetscCall(PetscSFDestroy(&sf)); 888 } 889 { 890 Vec coordinates; 891 PetscScalar *coords; 892 PetscSection coord_section; 893 PetscInt coord_size; 894 PetscCall(DMGetCoordinateSection(dm, &coord_section)); 895 PetscCall(PetscSectionSetNumFields(coord_section, 1)); 896 PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim)); 897 PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts)); 898 for (PetscInt v = 0; v < local_verts; v++) { 899 PetscInt point = local_elems + v; 900 PetscCall(PetscSectionSetDof(coord_section, point, dim)); 901 PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim)); 902 } 903 PetscCall(PetscSectionSetUp(coord_section)); 904 PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size)); 905 PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates)); 906 PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates")); 907 PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE)); 908 PetscCall(VecSetBlockSize(coordinates, dim)); 909 PetscCall(VecSetType(coordinates, VECSTANDARD)); 910 PetscCall(VecGetArray(coordinates, &coords)); 911 for (PetscInt v = 0; v < local_verts; v++) { 912 Ijk loc = ZCodeSplit(vert_z[v]); 913 coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i; 914 if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j; 915 if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k; 916 } 917 PetscCall(VecRestoreArray(coordinates, &coords)); 918 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 919 PetscCall(VecDestroy(&coordinates)); 920 } 921 if (interpolate) { 922 PetscCall(DMPlexInterpolateInPlace_Internal(dm)); 923 // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot 924 // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the 925 // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make 926 // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might 927 // be needed in a general CGNS reader, for example. 928 PetscCall(DMPlexOrientPositiveEdges_Private(dm)); 929 930 DMLabel label; 931 PetscCall(DMCreateLabel(dm, "Face Sets")); 932 PetscCall(DMGetLabel(dm, "Face Sets", &label)); 933 PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3]; 934 for (PetscInt i = 0; i < 3; i++) { 935 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i])); 936 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i])); 937 PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i])); 938 } 939 PetscInt fStart, fEnd, vStart, vEnd; 940 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 941 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 942 for (PetscInt f = fStart; f < fEnd; f++) { 943 PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8]; 944 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 945 PetscInt bc_count[6] = {0}; 946 for (PetscInt i = 0; i < npoints; i++) { 947 PetscInt p = points[2 * i]; 948 if (p < vStart || vEnd <= p) continue; 949 fverts[num_fverts++] = p; 950 Ijk loc = ZCodeSplit(vert_z[p - vStart]); 951 // Convention here matches DMPlexCreateCubeMesh_Internal 952 bc_count[0] += loc.i == 0; 953 bc_count[1] += loc.i == layout.vextent.i - 1; 954 bc_count[2] += loc.j == 0; 955 bc_count[3] += loc.j == layout.vextent.j - 1; 956 bc_count[4] += loc.k == 0; 957 bc_count[5] += loc.k == layout.vextent.k - 1; 958 } 959 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 960 for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) { 961 if (bc_count[bc] == PetscPowInt(2, dim - 1)) { 962 PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc])); 963 if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) { 964 PetscInt *put; 965 if (bc % 2 == 0) { // donor face; no label 966 PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put)); 967 *put = f; 968 } else { // periodic face 969 PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put)); 970 *put = f; 971 ZCode *zput; 972 PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput)); 973 for (PetscInt i = 0; i < num_fverts; i++) { 974 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]); 975 switch (bc / 2) { 976 case 0: 977 loc.i = 0; 978 break; 979 case 1: 980 loc.j = 0; 981 break; 982 case 2: 983 loc.k = 0; 984 break; 985 } 986 *zput++ = ZEncode(loc); 987 } 988 } 989 continue; 990 } 991 PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets"); 992 PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc])); 993 bc_match++; 994 } 995 } 996 } 997 // Ensure that the Coordinate DM has our new boundary labels 998 DM cdm; 999 PetscCall(DMGetCoordinateDM(dm, &cdm)); 1000 PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL)); 1001 if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) { 1002 PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces)); 1003 } 1004 for (PetscInt i = 0; i < 3; i++) { 1005 PetscCall(PetscSegBufferDestroy(&per_faces[i])); 1006 PetscCall(PetscSegBufferDestroy(&donor_face_closure[i])); 1007 PetscCall(PetscSegBufferDestroy(&my_donor_faces[i])); 1008 } 1009 } 1010 PetscCall(PetscFree(layout.zstarts)); 1011 PetscCall(PetscFree(vert_z)); 1012 PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 /*@ 1017 DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh 1018 1019 Logically Collective 1020 1021 Input Parameters: 1022 + dm - The `DMPLEX` on which to set periodicity 1023 . num_face_sfs - Number of `PetscSF`s in `face_sfs` 1024 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1025 1026 Level: advanced 1027 1028 Note: 1029 One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally 1030 and locally, but are paired when creating a global dof space. 1031 1032 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()` 1033 @*/ 1034 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs) 1035 { 1036 DM_Plex *plex = (DM_Plex *)dm->data; 1037 1038 PetscFunctionBegin; 1039 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1040 if (num_face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 1041 if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 1042 1043 for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i])); 1044 1045 if (plex->periodic.num_face_sfs > 0) { 1046 for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i])); 1047 PetscCall(PetscFree(plex->periodic.face_sfs)); 1048 } 1049 1050 plex->periodic.num_face_sfs = num_face_sfs; 1051 PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs)); 1052 for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i]; 1053 1054 DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one 1055 if (cdm) { 1056 PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs)); 1057 if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal; 1058 } 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 /*@C 1063 DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh 1064 1065 Logically Collective 1066 1067 Input Parameter: 1068 . dm - The `DMPLEX` for which to obtain periodic relation 1069 1070 Output Parameters: 1071 + num_face_sfs - Number of `PetscSF`s in the array 1072 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1073 1074 Level: advanced 1075 1076 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1077 @*/ 1078 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs) 1079 { 1080 DM_Plex *plex = (DM_Plex *)dm->data; 1081 1082 PetscFunctionBegin; 1083 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1084 *face_sfs = plex->periodic.face_sfs; 1085 *num_face_sfs = plex->periodic.num_face_sfs; 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 /*@C 1090 DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points 1091 1092 Logically Collective 1093 1094 Input Parameters: 1095 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()` 1096 . n - Number of transforms in array 1097 - t - Array of 4x4 affine transformation basis. 1098 1099 Level: advanced 1100 1101 Notes: 1102 Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation), 1103 the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always 1104 be 1. This representation is common in geometric modeling and allows affine transformations to be composed using 1105 simple matrix multiplication. 1106 1107 Although the interface accepts a general affine transform, only affine translation is supported at present. 1108 1109 Developer Notes: 1110 This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and 1111 adding GPU implementations to apply the G2L/L2G transforms. 1112 1113 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1114 @*/ 1115 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[]) 1116 { 1117 DM_Plex *plex = (DM_Plex *)dm->data; 1118 1119 PetscFunctionBegin; 1120 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1121 PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs); 1122 1123 PetscCall(PetscFree(plex->periodic.transform)); 1124 PetscCall(PetscMalloc1(n, &plex->periodic.transform)); 1125 for (PetscInt i = 0; i < n; i++) { 1126 for (PetscInt j = 0; j < 4; j++) { 1127 for (PetscInt k = 0; k < 4; k++) { 1128 PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported"); 1129 plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k]; 1130 } 1131 } 1132 } 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135