1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a `PetscSF` communication pattern using global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots, i.e. which roots are owned by each MPI process 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process 13 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage, that is the locations are in [0,`nleaves`) 14 . localmode - copy mode for `ilocal` 15 - gremote - root vertices in global numbering corresponding to the leaves 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of `layout`. 21 Leaf indices in `ilocal` get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,`nleaves`) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, const PetscInt gremote[]) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe a `PetscSF` 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 72 - gremote - root vertices in global numbering corresponding to the leaves 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs `layout` and `gremote` are freshly created each time this function is called, 79 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 80 81 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph (communication pattern) encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, ir; 183 PetscMPIInt r; 184 185 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 186 PetscCall(PetscMPIIntCast(ir, &r)); 187 if (r < 0) r = -(r + 2); 188 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 189 remote[l].rank = r; 190 remote[l].index = offset - ranges[r]; 191 } 192 } else { 193 for (d = 0; d < gsize; ++d, ++l) { 194 remote[l].rank = rank; 195 remote[l].index = goff + d - ranges[rank]; 196 } 197 } 198 } 199 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 207 208 Collective 209 210 Input Parameters: 211 + sf - The `PetscSF` 212 - rootSection - Section defined on root space 213 214 Output Parameters: 215 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection` 216 - leafSection - Section defined on the leaf space 217 218 Level: advanced 219 220 Note: 221 Caller must `PetscFree()` `remoteOffsets` if it was requested 222 223 To distribute data from the `rootSection` to the `leafSection`, see `PetscSFCreateSectionSF()` or `PetscSectionMigrateData()`. 224 225 Fortran Note: 226 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 227 228 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFCreateSectionSF()` 229 @*/ 230 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection) 231 { 232 PetscSF embedSF; 233 const PetscInt *indices; 234 IS selected; 235 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 236 PetscBool *sub, hasc; 237 238 PetscFunctionBegin; 239 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 240 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 241 if (numFields) { 242 IS perm; 243 244 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 245 leafSection->perm. To keep this permutation set by the user, we grab 246 the reference before calling PetscSectionSetNumFields() and set it 247 back after. */ 248 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 249 PetscCall(PetscObjectReference((PetscObject)perm)); 250 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 251 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 252 PetscCall(ISDestroy(&perm)); 253 } 254 PetscCall(PetscMalloc1(numFields + 2, &sub)); 255 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 256 for (f = 0; f < numFields; ++f) { 257 PetscSectionSym sym, dsym = NULL; 258 const char *name = NULL; 259 PetscInt numComp = 0; 260 261 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 262 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 263 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 264 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 265 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 266 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 267 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 268 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 269 PetscCall(PetscSectionSymDestroy(&dsym)); 270 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 271 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 272 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 273 } 274 } 275 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 276 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 277 rpEnd = PetscMin(rpEnd, nroots); 278 rpEnd = PetscMax(rpStart, rpEnd); 279 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 280 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 281 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 282 if (sub[0]) { 283 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 284 PetscCall(ISGetIndices(selected, &indices)); 285 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 286 PetscCall(ISRestoreIndices(selected, &indices)); 287 PetscCall(ISDestroy(&selected)); 288 } else { 289 PetscCall(PetscObjectReference((PetscObject)sf)); 290 embedSF = sf; 291 } 292 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 293 lpEnd++; 294 295 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 296 297 /* Constrained dof section */ 298 hasc = sub[1]; 299 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 300 301 /* Could fuse these at the cost of copies and extra allocation */ 302 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 303 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 304 if (sub[1]) { 305 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 306 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 307 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 308 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 309 } 310 for (f = 0; f < numFields; ++f) { 311 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 312 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 313 if (sub[2 + f]) { 314 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 315 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 316 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 317 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 318 } 319 } 320 if (remoteOffsets) { 321 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 322 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 323 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 324 } 325 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 326 PetscCall(PetscSectionSetUp(leafSection)); 327 if (hasc) { /* need to communicate bcIndices */ 328 PetscSF bcSF; 329 PetscInt *rOffBc; 330 331 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 332 if (sub[1]) { 333 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 334 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 335 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 336 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 337 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 338 PetscCall(PetscSFDestroy(&bcSF)); 339 } 340 for (f = 0; f < numFields; ++f) { 341 if (sub[2 + f]) { 342 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 343 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 344 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 345 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 346 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 347 PetscCall(PetscSFDestroy(&bcSF)); 348 } 349 } 350 PetscCall(PetscFree(rOffBc)); 351 } 352 PetscCall(PetscSFDestroy(&embedSF)); 353 PetscCall(PetscFree(sub)); 354 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /*@C 359 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 360 361 Collective 362 363 Input Parameters: 364 + sf - The `PetscSF` 365 . rootSection - Data layout of remote points for outgoing data (this is layout for roots) 366 - leafSection - Data layout of local points for incoming data (this is layout for leaves) 367 368 Output Parameter: 369 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 370 371 Level: developer 372 373 Note: 374 Caller must `PetscFree()` `remoteOffsets` if it was requested 375 376 Fortran Note: 377 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 378 379 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 380 @*/ 381 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[]) 382 { 383 PetscSF embedSF; 384 const PetscInt *indices; 385 IS selected; 386 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 387 388 PetscFunctionBegin; 389 *remoteOffsets = NULL; 390 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 391 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 392 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 393 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 394 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 395 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 396 PetscCall(ISGetIndices(selected, &indices)); 397 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 398 PetscCall(ISRestoreIndices(selected, &indices)); 399 PetscCall(ISDestroy(&selected)); 400 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 401 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 402 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 403 PetscCall(PetscSFDestroy(&embedSF)); 404 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 /*@ 409 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 410 411 Collective 412 413 Input Parameters: 414 + sf - The `PetscSF` 415 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 416 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 417 - leafSection - Data layout of local points for incoming data (this is the distributed section) 418 419 Output Parameter: 420 . sectionSF - The new `PetscSF` 421 422 Level: advanced 423 424 Notes: 425 `remoteOffsets` can be `NULL` if `sf` does not reference any points in `leafSection` 426 427 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFDistributeSection()` 428 @*/ 429 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 430 { 431 MPI_Comm comm; 432 const PetscInt *localPoints; 433 const PetscSFNode *remotePoints; 434 PetscInt lpStart, lpEnd; 435 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 436 PetscInt *localIndices; 437 PetscSFNode *remoteIndices; 438 PetscInt i, ind; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 442 PetscAssertPointer(rootSection, 2); 443 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 444 PetscAssertPointer(leafSection, 4); 445 PetscAssertPointer(sectionSF, 5); 446 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 447 PetscCall(PetscSFCreate(comm, sectionSF)); 448 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 449 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 450 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 451 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 452 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 453 for (i = 0; i < numPoints; ++i) { 454 PetscInt localPoint = localPoints ? localPoints[i] : i; 455 PetscInt dof; 456 457 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 458 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 459 numIndices += dof < 0 ? 0 : dof; 460 } 461 } 462 PetscCall(PetscMalloc1(numIndices, &localIndices)); 463 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 464 /* Create new index graph */ 465 for (i = 0, ind = 0; i < numPoints; ++i) { 466 PetscInt localPoint = localPoints ? localPoints[i] : i; 467 PetscInt rank = remotePoints[i].rank; 468 469 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 470 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 471 PetscInt loff, dof, d; 472 473 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 474 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 475 for (d = 0; d < dof; ++d, ++ind) { 476 localIndices[ind] = loff + d; 477 remoteIndices[ind].rank = rank; 478 remoteIndices[ind].index = remoteOffset + d; 479 } 480 } 481 } 482 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 483 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 484 PetscCall(PetscSFSetUp(*sectionSF)); 485 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 486 PetscFunctionReturn(PETSC_SUCCESS); 487 } 488 489 /*@C 490 PetscSFCreateFromLayouts - Creates a parallel star forest mapping between two `PetscLayout` objects 491 492 Collective 493 494 Input Parameters: 495 + rmap - `PetscLayout` defining the global root space 496 - lmap - `PetscLayout` defining the global leaf space 497 498 Output Parameter: 499 . sf - The parallel star forest 500 501 Level: intermediate 502 503 Notes: 504 If the global length of `lmap` differs from the global length of `rmap` then the excess entries are ignored. 505 506 The resulting `sf` used with `PetscSFBcastBegin()` and `PetscSFBcastEnd()` merely copies the array entries of `rootdata` to 507 `leafdata`; moving them between MPI processes if needed. For example, 508 if rmap is [0, 3, 5) and lmap is [0, 2, 6) and `rootdata` is (1, 2, 3) on MPI rank 0 and (4, 5) on MPI rank 1 then the 509 `leafdata` would become (1, 2) on MPI rank 0 and (3, 4, 5, x) on MPI rank 1. 510 511 .seealso: [](sec_petscsf), `PetscSF`, `PetscLayout`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 512 @*/ 513 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 514 { 515 PetscInt i, nroots, nleaves = 0; 516 PetscInt rN, lst, len; 517 PetscMPIInt owner = -1; 518 PetscSFNode *remote; 519 MPI_Comm rcomm = rmap->comm; 520 MPI_Comm lcomm = lmap->comm; 521 PetscMPIInt flg; 522 523 PetscFunctionBegin; 524 PetscAssertPointer(sf, 3); 525 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 526 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 527 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 528 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 529 PetscCall(PetscSFCreate(rcomm, sf)); 530 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 531 PetscCall(PetscLayoutGetSize(rmap, &rN)); 532 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 533 PetscCall(PetscMalloc1(len - lst, &remote)); 534 for (i = lst; i < len && i < rN; i++) { 535 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 536 remote[nleaves].rank = owner; 537 remote[nleaves].index = i - rmap->range[owner]; 538 nleaves++; 539 } 540 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 541 PetscCall(PetscFree(remote)); 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 546 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[]) 547 { 548 PetscInt *owners = map->range; 549 PetscInt n = map->n; 550 PetscSF sf; 551 PetscInt *lidxs, *work = NULL, *ilocal; 552 PetscSFNode *ridxs; 553 PetscMPIInt rank, p = 0; 554 PetscInt r, len = 0, nleaves = 0; 555 556 PetscFunctionBegin; 557 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 558 /* Create SF where leaves are input idxs and roots are owned idxs */ 559 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 560 PetscCall(PetscMalloc1(n, &lidxs)); 561 for (r = 0; r < n; ++r) lidxs[r] = -1; 562 PetscCall(PetscMalloc1(N, &ridxs)); 563 PetscCall(PetscMalloc1(N, &ilocal)); 564 for (r = 0; r < N; ++r) { 565 const PetscInt idx = idxs[r]; 566 567 if (idx < 0) continue; 568 PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 569 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 570 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 571 } 572 ridxs[nleaves].rank = p; 573 ridxs[nleaves].index = idxs[r] - owners[p]; 574 ilocal[nleaves] = r; 575 nleaves++; 576 } 577 PetscCall(PetscSFCreate(map->comm, &sf)); 578 PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 579 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 580 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 581 if (ogidxs) { /* communicate global idxs */ 582 PetscInt cum = 0, start, *work2; 583 584 PetscCall(PetscMalloc1(n, &work)); 585 PetscCall(PetscCalloc1(N, &work2)); 586 for (r = 0; r < N; ++r) 587 if (idxs[r] >= 0) cum++; 588 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 589 start -= cum; 590 cum = 0; 591 for (r = 0; r < N; ++r) 592 if (idxs[r] >= 0) work2[r] = start + cum++; 593 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 594 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 595 PetscCall(PetscFree(work2)); 596 } 597 PetscCall(PetscSFDestroy(&sf)); 598 /* Compress and put in indices */ 599 for (r = 0; r < n; ++r) 600 if (lidxs[r] >= 0) { 601 if (work) work[len] = work[r]; 602 lidxs[len++] = r; 603 } 604 if (on) *on = len; 605 if (oidxs) *oidxs = lidxs; 606 if (ogidxs) *ogidxs = work; 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 /*@ 611 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 612 613 Collective 614 615 Input Parameters: 616 + layout - `PetscLayout` defining the global index space and the MPI rank that brokers each index 617 . numRootIndices - size of `rootIndices` 618 . rootIndices - array of global indices of which this process requests ownership 619 . rootLocalIndices - root local index permutation (`NULL` if no permutation) 620 . rootLocalOffset - offset to be added to `rootLocalIndices` 621 . numLeafIndices - size of `leafIndices` 622 . leafIndices - array of global indices with which this process requires data associated 623 . leafLocalIndices - leaf local index permutation (`NULL` if no permutation) 624 - leafLocalOffset - offset to be added to `leafLocalIndices` 625 626 Output Parameters: 627 + sfA - star forest representing the communication pattern from the layout space to the leaf space (`NULL` if not needed) 628 - sf - star forest representing the communication pattern from the root space to the leaf space 629 630 Level: advanced 631 632 Example 1: 633 .vb 634 rank : 0 1 2 635 rootIndices : [1 0 2] [3] [3] 636 rootLocalOffset : 100 200 300 637 layout : [0 1] [2] [3] 638 leafIndices : [0] [2] [0 3] 639 leafLocalOffset : 400 500 600 640 641 would build the following PetscSF 642 643 [0] 400 <- (0,101) 644 [1] 500 <- (0,102) 645 [2] 600 <- (0,101) 646 [2] 601 <- (2,300) 647 .ve 648 649 Example 2: 650 .vb 651 rank : 0 1 2 652 rootIndices : [1 0 2] [3] [3] 653 rootLocalOffset : 100 200 300 654 layout : [0 1] [2] [3] 655 leafIndices : rootIndices rootIndices rootIndices 656 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 657 658 would build the following PetscSF 659 660 [1] 200 <- (2,300) 661 .ve 662 663 Example 3: 664 .vb 665 No process requests ownership of global index 1, but no process needs it. 666 667 rank : 0 1 2 668 numRootIndices : 2 1 1 669 rootIndices : [0 2] [3] [3] 670 rootLocalOffset : 100 200 300 671 layout : [0 1] [2] [3] 672 numLeafIndices : 1 1 2 673 leafIndices : [0] [2] [0 3] 674 leafLocalOffset : 400 500 600 675 676 would build the following PetscSF 677 678 [0] 400 <- (0,100) 679 [1] 500 <- (0,101) 680 [2] 600 <- (0,100) 681 [2] 601 <- (2,300) 682 .ve 683 684 Notes: 685 `layout` represents any partitioning of [0, N), where N is the total number of global indices, and its 686 local size can be set to `PETSC_DECIDE`. 687 688 If a global index x lies in the partition owned by process i, each process whose `rootIndices` contains x requests 689 ownership of x and sends its own rank and the local index of x to process i. 690 If multiple processes request ownership of x, the one with the highest rank is to own x. 691 Process i then broadcasts the ownership information, so that each process whose `leafIndices` contains x knows the 692 ownership information of x. 693 The output `sf` is constructed by associating each leaf point to a root point in this way. 694 695 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 696 The optional output `sfA` can be used to push such data to leaf points. 697 698 All indices in `rootIndices` and `leafIndices` must lie in the layout range. The union (over all processes) of `rootIndices` 699 must cover that of `leafIndices`, but need not cover the entire layout. 700 701 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 702 star forest is almost identity, so will only include non-trivial part of the map. 703 704 Developer Notes: 705 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 706 hash(rank, root_local_index) as the bid for the ownership determination. 707 708 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 709 @*/ 710 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt rootIndices[], const PetscInt rootLocalIndices[], PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt leafIndices[], const PetscInt leafLocalIndices[], PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 711 { 712 MPI_Comm comm = layout->comm; 713 PetscMPIInt rank; 714 PetscSF sf1; 715 PetscSFNode *owners, *buffer, *iremote; 716 PetscInt *ilocal, nleaves, N, n, i; 717 PetscBool areIndicesSame; 718 719 PetscFunctionBegin; 720 if (rootIndices) PetscAssertPointer(rootIndices, 3); 721 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 722 if (leafIndices) PetscAssertPointer(leafIndices, 7); 723 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 724 if (sfA) PetscAssertPointer(sfA, 10); 725 PetscAssertPointer(sf, 11); 726 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 727 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 728 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 729 PetscCall(PetscLayoutSetUp(layout)); 730 PetscCall(PetscLayoutGetSize(layout, &N)); 731 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 732 areIndicesSame = (PetscBool)(leafIndices == rootIndices); 733 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &areIndicesSame, 1, MPI_C_BOOL, MPI_LAND, comm)); 734 PetscCheck(!areIndicesSame || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 735 if (PetscDefined(USE_DEBUG)) { 736 PetscInt N1 = PETSC_INT_MIN; 737 for (i = 0; i < numRootIndices; i++) 738 if (rootIndices[i] > N1) N1 = rootIndices[i]; 739 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 740 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 741 if (!areIndicesSame) { 742 N1 = PETSC_INT_MIN; 743 for (i = 0; i < numLeafIndices; i++) 744 if (leafIndices[i] > N1) N1 = leafIndices[i]; 745 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 746 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 747 } 748 } 749 750 /* Reduce: owners -> buffer */ 751 PetscCall(PetscMalloc1(n, &buffer)); 752 PetscCall(PetscSFCreate(comm, &sf1)); 753 PetscCall(PetscSFSetFromOptions(sf1)); 754 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 755 PetscCall(PetscMalloc1(numRootIndices, &owners)); 756 for (i = 0; i < numRootIndices; ++i) { 757 owners[i].rank = rank; 758 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 759 } 760 for (i = 0; i < n; ++i) { 761 buffer[i].index = -1; 762 buffer[i].rank = -1; 763 } 764 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 765 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 766 /* Bcast: buffer -> owners */ 767 if (!areIndicesSame) { 768 PetscCall(PetscFree(owners)); 769 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 770 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 771 } 772 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 773 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 774 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 775 PetscCall(PetscFree(buffer)); 776 if (sfA) { 777 *sfA = sf1; 778 } else PetscCall(PetscSFDestroy(&sf1)); 779 /* Create sf */ 780 if (areIndicesSame && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 781 /* leaf space == root space */ 782 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 783 if (owners[i].rank != rank) ++nleaves; 784 PetscCall(PetscMalloc1(nleaves, &ilocal)); 785 PetscCall(PetscMalloc1(nleaves, &iremote)); 786 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 787 if (owners[i].rank != rank) { 788 ilocal[nleaves] = leafLocalOffset + i; 789 iremote[nleaves].rank = owners[i].rank; 790 iremote[nleaves].index = owners[i].index; 791 ++nleaves; 792 } 793 } 794 PetscCall(PetscFree(owners)); 795 } else { 796 nleaves = numLeafIndices; 797 PetscCall(PetscMalloc1(nleaves, &ilocal)); 798 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 799 iremote = owners; 800 } 801 PetscCall(PetscSFCreate(comm, sf)); 802 PetscCall(PetscSFSetFromOptions(*sf)); 803 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 /*@ 808 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 809 810 Collective 811 812 Input Parameters: 813 + sfa - default `PetscSF` 814 - sfb - additional edges to add/replace edges in `sfa` 815 816 Output Parameter: 817 . merged - new `PetscSF` with combined edges 818 819 Level: intermediate 820 821 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()` 822 @*/ 823 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 824 { 825 PetscInt maxleaf; 826 827 PetscFunctionBegin; 828 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 829 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 830 PetscCheckSameComm(sfa, 1, sfb, 2); 831 PetscAssertPointer(merged, 3); 832 { 833 PetscInt aleaf, bleaf; 834 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 835 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 836 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 837 } 838 PetscInt *clocal, aroots, aleaves, broots, bleaves; 839 PetscSFNode *cremote; 840 const PetscInt *alocal, *blocal; 841 const PetscSFNode *aremote, *bremote; 842 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 843 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 844 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 845 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 846 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 847 for (PetscInt i = 0; i < aleaves; i++) { 848 PetscInt a = alocal ? alocal[i] : i; 849 clocal[a] = a; 850 cremote[a] = aremote[i]; 851 } 852 for (PetscInt i = 0; i < bleaves; i++) { 853 PetscInt b = blocal ? blocal[i] : i; 854 clocal[b] = b; 855 cremote[b] = bremote[i]; 856 } 857 PetscInt nleaves = 0; 858 for (PetscInt i = 0; i < maxleaf; i++) { 859 if (clocal[i] < 0) continue; 860 clocal[nleaves] = clocal[i]; 861 cremote[nleaves] = cremote[i]; 862 nleaves++; 863 } 864 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 865 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 866 PetscCall(PetscFree2(clocal, cremote)); 867 PetscFunctionReturn(PETSC_SUCCESS); 868 } 869 870 /*@ 871 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 872 873 Collective 874 875 Input Parameters: 876 + sf - star forest 877 . bs - stride 878 . ldr - leading dimension of root space 879 - ldl - leading dimension of leaf space 880 881 Output Parameter: 882 . vsf - the new `PetscSF` 883 884 Level: intermediate 885 886 Notes: 887 This can be useful to perform communications on multiple right-hand sides stored in a Fortran-style two dimensional array. 888 For example, the calling sequence 889 .vb 890 c_datatype *roots, *leaves; 891 for i in [0,bs) do 892 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 893 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 894 .ve 895 is equivalent to 896 .vb 897 c_datatype *roots, *leaves; 898 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 899 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 900 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 901 .ve 902 903 Developer Notes: 904 Should this functionality be handled with a new API instead of creating a new object? 905 906 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 907 @*/ 908 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 909 { 910 PetscSF rankssf; 911 const PetscSFNode *iremote, *sfrremote; 912 PetscSFNode *viremote; 913 const PetscInt *ilocal; 914 PetscInt *vilocal = NULL, *ldrs; 915 PetscInt nranks, nr, nl, vnr, vnl, maxl; 916 PetscMPIInt rank; 917 MPI_Comm comm; 918 PetscSFType sftype; 919 920 PetscFunctionBegin; 921 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 922 PetscValidLogicalCollectiveInt(sf, bs, 2); 923 PetscAssertPointer(vsf, 5); 924 if (bs == 1) { 925 PetscCall(PetscObjectReference((PetscObject)sf)); 926 *vsf = sf; 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 PetscCall(PetscSFSetUp(sf)); 930 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 931 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 932 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 933 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 934 maxl += 1; 935 if (ldl == PETSC_DECIDE) ldl = maxl; 936 if (ldr == PETSC_DECIDE) ldr = nr; 937 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 938 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 939 vnr = nr * bs; 940 vnl = nl * bs; 941 PetscCall(PetscMalloc1(vnl, &viremote)); 942 PetscCall(PetscMalloc1(vnl, &vilocal)); 943 944 /* Communicate root leading dimensions to leaf ranks */ 945 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 946 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 947 PetscCall(PetscMalloc1(nranks, &ldrs)); 948 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 949 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 950 951 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 952 const PetscInt r = iremote[i].rank; 953 const PetscInt ii = iremote[i].index; 954 955 if (r == rank) lda = ldr; 956 else if (rold != r) { 957 PetscInt j; 958 959 for (j = 0; j < nranks; j++) 960 if (sfrremote[j].rank == r) break; 961 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 962 lda = ldrs[j]; 963 } 964 rold = r; 965 for (PetscInt v = 0; v < bs; v++) { 966 viremote[v * nl + i].rank = r; 967 viremote[v * nl + i].index = v * lda + ii; 968 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 969 } 970 } 971 PetscCall(PetscFree(ldrs)); 972 PetscCall(PetscSFCreate(comm, vsf)); 973 PetscCall(PetscSFGetType(sf, &sftype)); 974 PetscCall(PetscSFSetType(*vsf, sftype)); 975 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978