xref: /petsc/src/vec/is/sf/interface/sf.c (revision cf84bf9e8334fcb41ddd5b57d5020f9ba05506ed)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
7715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
87fd2d3dbSJunchao Zhang #endif
97fd2d3dbSJunchao Zhang 
107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
117fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
127fd2d3dbSJunchao Zhang #endif
137fd2d3dbSJunchao Zhang 
142abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
154bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
162abc8c78SJacob Faibussowitsch #else
1795fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
18a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
1995fce210SBarry Smith   #else
209371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
219371c9d4SSatish Balay       do { \
229371c9d4SSatish Balay       } while (0)
2395fce210SBarry Smith   #endif
242abc8c78SJacob Faibussowitsch #endif
2595fce210SBarry Smith 
264c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
271f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2895fce210SBarry Smith 
298af6ec1cSBarry Smith /*@
3095fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3195fce210SBarry Smith 
32d083f849SBarry Smith   Collective
3395fce210SBarry Smith 
344165533cSJose E. Roman   Input Parameter:
3595fce210SBarry Smith . comm - communicator on which the star forest will operate
3695fce210SBarry Smith 
374165533cSJose E. Roman   Output Parameter:
3895fce210SBarry Smith . sf - new star forest context
3995fce210SBarry Smith 
4020662ed9SBarry Smith   Options Database Key:
416677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
426677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
436677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
446677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45dd5b3ca6SJunchao Zhang 
4695fce210SBarry Smith   Level: intermediate
4795fce210SBarry Smith 
48cab54364SBarry Smith   Note:
49cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
5120662ed9SBarry Smith   `SF`s are optimized and they have better performance than the general `SF`s.
52dd5b3ca6SJunchao Zhang 
5338b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5495fce210SBarry Smith @*/
55d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56d71ae5a4SJacob Faibussowitsch {
5795fce210SBarry Smith   PetscSF b;
5895fce210SBarry Smith 
5995fce210SBarry Smith   PetscFunctionBegin;
604f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
619566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6295fce210SBarry Smith 
639566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6495fce210SBarry Smith   b->nroots    = -1;
6595fce210SBarry Smith   b->nleaves   = -1;
661690c2aeSBarry Smith   b->minleaf   = PETSC_INT_MAX;
671690c2aeSBarry Smith   b->maxleaf   = PETSC_INT_MIN;
6895fce210SBarry Smith   b->nranks    = -1;
6995fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7095fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7195fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7295fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7320c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7420c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7520c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7671438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7727f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7820c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
7927f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8027f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8159af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8259af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8320c24465SJunchao Zhang   #endif
8471438e86SJunchao Zhang 
8571438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8671438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8771438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
889566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
899566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9071438e86SJunchao Zhang   #endif
9120c24465SJunchao Zhang #endif
9260c22052SBarry Smith   b->vscat.from_n = -1;
9360c22052SBarry Smith   b->vscat.to_n   = -1;
9460c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9595fce210SBarry Smith   *sf             = b;
963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9795fce210SBarry Smith }
9895fce210SBarry Smith 
9929046d53SLisandro Dalcin /*@
10095fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10195fce210SBarry Smith 
10295fce210SBarry Smith   Collective
10395fce210SBarry Smith 
1044165533cSJose E. Roman   Input Parameter:
10595fce210SBarry Smith . sf - star forest
10695fce210SBarry Smith 
10795fce210SBarry Smith   Level: advanced
10895fce210SBarry Smith 
109cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11095fce210SBarry Smith @*/
111d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
112d71ae5a4SJacob Faibussowitsch {
11395fce210SBarry Smith   PetscFunctionBegin;
11495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1160dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1170dd791a8SStefano Zampini 
11829046d53SLisandro Dalcin   sf->nroots   = -1;
11929046d53SLisandro Dalcin   sf->nleaves  = -1;
1201690c2aeSBarry Smith   sf->minleaf  = PETSC_INT_MAX;
1211690c2aeSBarry Smith   sf->maxleaf  = PETSC_INT_MIN;
12295fce210SBarry Smith   sf->mine     = NULL;
12395fce210SBarry Smith   sf->remote   = NULL;
12429046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1259566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12721c688dcSJed Brown   sf->nranks = -1;
1289566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
12929046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1309566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1319566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1329566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1330dd791a8SStefano Zampini 
134013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1359566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1360dd791a8SStefano Zampini 
1379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13871438e86SJunchao Zhang 
13971438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1409566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14171438e86SJunchao Zhang #endif
14271438e86SJunchao Zhang 
14395fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14595fce210SBarry Smith }
14695fce210SBarry Smith 
147cc4c1da9SBarry Smith /*@
148cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
14995fce210SBarry Smith 
150c3339decSBarry Smith   Collective
15195fce210SBarry Smith 
15295fce210SBarry Smith   Input Parameters:
153cab54364SBarry Smith + sf   - the `PetscSF` context
15495fce210SBarry Smith - type - a known method
155cab54364SBarry Smith .vb
156cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
157cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158cab54364SBarry Smith .ve
15995fce210SBarry Smith 
16095fce210SBarry Smith   Options Database Key:
16120662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162cab54364SBarry Smith 
163cab54364SBarry Smith   Level: intermediate
16495fce210SBarry Smith 
16595fce210SBarry Smith   Notes:
16620662ed9SBarry Smith   See `PetscSFType` for possible values
16795fce210SBarry Smith 
16820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
16995fce210SBarry Smith @*/
170d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171d71ae5a4SJacob Faibussowitsch {
17295fce210SBarry Smith   PetscBool match;
1735f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17495fce210SBarry Smith 
17595fce210SBarry Smith   PetscFunctionBegin;
17695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1774f572ea9SToby Isaac   PetscAssertPointer(type, 2);
17895fce210SBarry Smith 
1799566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1803ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18195fce210SBarry Smith 
1829566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1836adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18429046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
185dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1869566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1889566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19095fce210SBarry Smith }
19195fce210SBarry Smith 
192cc4c1da9SBarry Smith /*@
193cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19429046d53SLisandro Dalcin 
19529046d53SLisandro Dalcin   Not Collective
19629046d53SLisandro Dalcin 
19729046d53SLisandro Dalcin   Input Parameter:
198cab54364SBarry Smith . sf - the `PetscSF` context
19929046d53SLisandro Dalcin 
20029046d53SLisandro Dalcin   Output Parameter:
201cab54364SBarry Smith . type - the `PetscSF` type name
20229046d53SLisandro Dalcin 
20329046d53SLisandro Dalcin   Level: intermediate
20429046d53SLisandro Dalcin 
20520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20629046d53SLisandro Dalcin @*/
207d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208d71ae5a4SJacob Faibussowitsch {
20929046d53SLisandro Dalcin   PetscFunctionBegin;
21029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2114f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21229046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21429046d53SLisandro Dalcin }
21529046d53SLisandro Dalcin 
2160764c050SBarry Smith /*@
21720662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
21895fce210SBarry Smith 
21995fce210SBarry Smith   Collective
22095fce210SBarry Smith 
2214165533cSJose E. Roman   Input Parameter:
22295fce210SBarry Smith . sf - address of star forest
22395fce210SBarry Smith 
22495fce210SBarry Smith   Level: intermediate
22595fce210SBarry Smith 
22620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22795fce210SBarry Smith @*/
228d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
229d71ae5a4SJacob Faibussowitsch {
23095fce210SBarry Smith   PetscFunctionBegin;
2313ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2349371c9d4SSatish Balay     *sf = NULL;
2353ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2369371c9d4SSatish Balay   }
2379566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
238f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2399566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2409566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
243715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245715b587bSJunchao Zhang   }
246715b587bSJunchao Zhang #endif
2479566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24995fce210SBarry Smith }
25095fce210SBarry Smith 
251d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252d71ae5a4SJacob Faibussowitsch {
253c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
254c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
255c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
256c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
257c4e6a40aSLawrence Mitchell 
258c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2593ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2609566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
263c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
264c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
265c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26708401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26808401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269c4e6a40aSLawrence Mitchell   }
2703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
271c4e6a40aSLawrence Mitchell }
272c4e6a40aSLawrence Mitchell 
27395fce210SBarry Smith /*@
27420662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27595fce210SBarry Smith 
27695fce210SBarry Smith   Collective
27795fce210SBarry Smith 
2784165533cSJose E. Roman   Input Parameter:
27995fce210SBarry Smith . sf - star forest communication object
28095fce210SBarry Smith 
28195fce210SBarry Smith   Level: beginner
28295fce210SBarry Smith 
28320662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28495fce210SBarry Smith @*/
285d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
286d71ae5a4SJacob Faibussowitsch {
28795fce210SBarry Smith   PetscFunctionBegin;
28829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
28929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2903ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2919566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2929566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2939566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29620c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29771438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29871438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
29920c24465SJunchao Zhang   }
30020c24465SJunchao Zhang #endif
30159af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30259af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30359af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30459af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30559af0bd3SScott Kruger   }
30659af0bd3SScott Kruger #endif
30720c24465SJunchao Zhang 
30820c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
30920c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31020c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31120c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31220c24465SJunchao Zhang   }
31320c24465SJunchao Zhang #endif
3149566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31595fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31795fce210SBarry Smith }
31895fce210SBarry Smith 
3198af6ec1cSBarry Smith /*@
320cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32195fce210SBarry Smith 
32295fce210SBarry Smith   Logically Collective
32395fce210SBarry Smith 
3244165533cSJose E. Roman   Input Parameter:
32595fce210SBarry Smith . sf - star forest
32695fce210SBarry Smith 
32795fce210SBarry Smith   Options Database Keys:
32820662ed9SBarry Smith + -sf_type                      - implementation type, see `PetscSFSetType()`
32951ccb202SJunchao Zhang . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
33020662ed9SBarry Smith . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33120662ed9SBarry Smith                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33220662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33320662ed9SBarry Smith . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33420662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33595fce210SBarry Smith 
3366497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33759af0bd3SScott Kruger                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33820c24465SJunchao Zhang                                   the only available is kokkos.
33920c24465SJunchao Zhang 
34095fce210SBarry Smith   Level: intermediate
341cab54364SBarry Smith 
342cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34395fce210SBarry Smith @*/
344d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345d71ae5a4SJacob Faibussowitsch {
34695fce210SBarry Smith   PetscSFType deft;
34795fce210SBarry Smith   char        type[256];
34895fce210SBarry Smith   PetscBool   flg;
34995fce210SBarry Smith 
35095fce210SBarry Smith   PetscFunctionBegin;
35195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35395fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3549566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3559566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3569566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357f9334340SJunchao Zhang   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35920c24465SJunchao Zhang   {
36020c24465SJunchao Zhang     char      backendstr[32] = {0};
36159af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36220c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
363d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3649566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3669566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36959af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37020c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37120c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37259af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37328b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37508401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang   #endif
377715b587bSJunchao Zhang 
378715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
380715b587bSJunchao Zhang       MPI_Info info;
381715b587bSJunchao Zhang 
382715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
387715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388715b587bSJunchao Zhang     }
389715b587bSJunchao Zhang   #endif
39020c24465SJunchao Zhang   }
391c2a741eeSJunchao Zhang #endif
392dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393d0609cedSBarry Smith   PetscOptionsEnd();
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39595fce210SBarry Smith }
39695fce210SBarry Smith 
39729046d53SLisandro Dalcin /*@
39895fce210SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
39995fce210SBarry Smith 
40095fce210SBarry Smith   Logically Collective
40195fce210SBarry Smith 
4024165533cSJose E. Roman   Input Parameters:
40395fce210SBarry Smith + sf  - star forest
404cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
40595fce210SBarry Smith 
40695fce210SBarry Smith   Level: advanced
40795fce210SBarry Smith 
40820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
40995fce210SBarry Smith @*/
410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411d71ae5a4SJacob Faibussowitsch {
41295fce210SBarry Smith   PetscFunctionBegin;
41395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41495fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41528b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41695fce210SBarry Smith   sf->rankorder = flg;
4173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41895fce210SBarry Smith }
41995fce210SBarry Smith 
4205d83a8b1SBarry Smith /*@
42195fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42295fce210SBarry Smith 
42395fce210SBarry Smith   Collective
42495fce210SBarry Smith 
4254165533cSJose E. Roman   Input Parameters:
42695fce210SBarry Smith + sf         - star forest
42795fce210SBarry Smith . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
42895fce210SBarry Smith . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
42920662ed9SBarry Smith . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430c4e6a40aSLawrence Mitchell                during setup in debug mode)
43120662ed9SBarry Smith . localmode  - copy mode for `ilocal`
43226a11704SBarry Smith . iremote    - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
43326a11704SBarry Smith                (locations must be >= 0, enforced during setup in debug mode)
43420662ed9SBarry Smith - remotemode - copy mode for `iremote`
43595fce210SBarry Smith 
43695fce210SBarry Smith   Level: intermediate
43795fce210SBarry Smith 
43895452b02SPatrick Sanan   Notes:
43920662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
44038ab3f8aSBarry Smith 
44120662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44220662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443db2b9530SVaclav Hapla   PETSc might modify the respective array;
44420662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445cab54364SBarry Smith   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446db2b9530SVaclav Hapla 
44738b5cf2dSJacob Faibussowitsch   Fortran Notes:
44820662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449c4e6a40aSLawrence Mitchell 
45038b5cf2dSJacob Faibussowitsch   Developer Notes:
451db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45220662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45372bf8598SVaclav Hapla 
45420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
45595fce210SBarry Smith @*/
4569c9354e5SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457d71ae5a4SJacob Faibussowitsch {
458db2b9530SVaclav Hapla   PetscBool unique, contiguous;
45995fce210SBarry Smith 
46095fce210SBarry Smith   PetscFunctionBegin;
46195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4624f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4634f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46408401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46508401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4668da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4678da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4688da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4698da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47029046d53SLisandro Dalcin 
4712a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4729566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4732a67d2daSStefano Zampini   }
4742a67d2daSStefano Zampini 
4759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
4766497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
4776497c311SBarry Smith     PetscMPIInt size;
4786497c311SBarry Smith 
4796497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480ac530a7eSPierre Jolivet     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
4816497c311SBarry Smith   }
48229046d53SLisandro Dalcin 
48395fce210SBarry Smith   sf->nroots  = nroots;
48495fce210SBarry Smith   sf->nleaves = nleaves;
48529046d53SLisandro Dalcin 
486db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
487db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
488db2b9530SVaclav Hapla 
4899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4909566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491db2b9530SVaclav Hapla     ilocal = tlocal;
492db2b9530SVaclav Hapla   }
493db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
494db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
495db2b9530SVaclav Hapla 
4969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4979566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498db2b9530SVaclav Hapla     iremote = tremote;
499db2b9530SVaclav Hapla   }
500db2b9530SVaclav Hapla 
50129046d53SLisandro Dalcin   if (nleaves && ilocal) {
502db2b9530SVaclav Hapla     PetscSFNode work;
503db2b9530SVaclav Hapla 
5049566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5059566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506db2b9530SVaclav Hapla     unique = PetscNot(unique);
507db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
509db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
510db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
51129046d53SLisandro Dalcin   } else {
51229046d53SLisandro Dalcin     sf->minleaf = 0;
51329046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
514db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
515db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51629046d53SLisandro Dalcin   }
51729046d53SLisandro Dalcin 
518db2b9530SVaclav Hapla   if (contiguous) {
519db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
520db2b9530SVaclav Hapla       ilocal = NULL;
521db2b9530SVaclav Hapla     } else {
5229566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
523db2b9530SVaclav Hapla     }
524db2b9530SVaclav Hapla   }
525db2b9530SVaclav Hapla   sf->mine = ilocal;
526db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52729046d53SLisandro Dalcin     sf->mine_alloc = NULL;
528db2b9530SVaclav Hapla   } else {
529db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
53095fce210SBarry Smith   }
5316497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
5326497c311SBarry Smith     PetscMPIInt size;
5336497c311SBarry Smith 
5346497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535ac530a7eSPierre Jolivet     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
5366497c311SBarry Smith   }
537db2b9530SVaclav Hapla   sf->remote = iremote;
538db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
53929046d53SLisandro Dalcin     sf->remote_alloc = NULL;
540db2b9530SVaclav Hapla   } else {
541db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
54295fce210SBarry Smith   }
5439566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
54429046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54695fce210SBarry Smith }
54795fce210SBarry Smith 
54829046d53SLisandro Dalcin /*@
549cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550dd5b3ca6SJunchao Zhang 
551dd5b3ca6SJunchao Zhang   Collective
552dd5b3ca6SJunchao Zhang 
553dd5b3ca6SJunchao Zhang   Input Parameters:
554cab54364SBarry Smith + sf      - The `PetscSF`
555cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557cab54364SBarry Smith 
558cab54364SBarry Smith   Level: intermediate
559dd5b3ca6SJunchao Zhang 
560dd5b3ca6SJunchao Zhang   Notes:
56120662ed9SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
56220662ed9SBarry Smith   `n` and `N` are the local and global sizes of `x` respectively.
563dd5b3ca6SJunchao Zhang 
56420662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
56520662ed9SBarry Smith   sequential vectors `y` on all MPI processes.
566dd5b3ca6SJunchao Zhang 
56720662ed9SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
56820662ed9SBarry Smith   sequential vector `y` on rank 0.
569dd5b3ca6SJunchao Zhang 
57020662ed9SBarry Smith   In above cases, entries of `x` are roots and entries of `y` are leaves.
571dd5b3ca6SJunchao Zhang 
57220662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
573dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
574cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
575dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
576cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
577dd5b3ca6SJunchao Zhang 
578dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
579dd5b3ca6SJunchao Zhang 
580cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
581dd5b3ca6SJunchao Zhang  @*/
582d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
583d71ae5a4SJacob Faibussowitsch {
584dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
585dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
586dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
587dd5b3ca6SJunchao Zhang   PetscSFType type;
588dd5b3ca6SJunchao Zhang 
589dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5902abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5914f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5929566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5932c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
596dd5b3ca6SJunchao Zhang 
597dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598835f2295SStefano Zampini     PetscInt sizei = size;
599835f2295SStefano Zampini 
600dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
6019566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
6029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
603835f2295SStefano Zampini     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
6049566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
605dd5b3ca6SJunchao Zhang   } else {
6069566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
6079566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
608dd5b3ca6SJunchao Zhang     res[0] = n;
609dd5b3ca6SJunchao Zhang     res[1] = -n;
610dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
611462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
612dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
613dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
614dd5b3ca6SJunchao Zhang     } else {
615dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
616dd5b3ca6SJunchao Zhang     }
6179566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
618dd5b3ca6SJunchao Zhang   }
6199566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
620dd5b3ca6SJunchao Zhang 
621dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
622dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
623dd5b3ca6SJunchao Zhang 
624dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
625dd5b3ca6SJunchao Zhang      Also set other easy stuff.
626dd5b3ca6SJunchao Zhang    */
627dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
628dd5b3ca6SJunchao Zhang     sf->nleaves = N;
629dd5b3ca6SJunchao Zhang     sf->nroots  = n;
630dd5b3ca6SJunchao Zhang     sf->nranks  = size;
631dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
632dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
633dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
634dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
635dd5b3ca6SJunchao Zhang     sf->nroots  = n;
636dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
637dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
638dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
639dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
640dd5b3ca6SJunchao Zhang     sf->nleaves = size;
641dd5b3ca6SJunchao Zhang     sf->nroots  = size;
642dd5b3ca6SJunchao Zhang     sf->nranks  = size;
643dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
644dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
645dd5b3ca6SJunchao Zhang   }
646dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
647dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
649dd5b3ca6SJunchao Zhang }
650dd5b3ca6SJunchao Zhang 
651dd5b3ca6SJunchao Zhang /*@
652cab54364SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
65395fce210SBarry Smith 
65495fce210SBarry Smith   Collective
65595fce210SBarry Smith 
6564165533cSJose E. Roman   Input Parameter:
65795fce210SBarry Smith . sf - star forest to invert
65895fce210SBarry Smith 
6594165533cSJose E. Roman   Output Parameter:
66020662ed9SBarry Smith . isf - inverse of `sf`
6614165533cSJose E. Roman 
66295fce210SBarry Smith   Level: advanced
66395fce210SBarry Smith 
66495fce210SBarry Smith   Notes:
66595fce210SBarry Smith   All roots must have degree 1.
66695fce210SBarry Smith 
66795fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
66895fce210SBarry Smith 
66920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
67095fce210SBarry Smith @*/
671d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
672d71ae5a4SJacob Faibussowitsch {
67395fce210SBarry Smith   PetscMPIInt     rank;
67495fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
67595fce210SBarry Smith   const PetscInt *ilocal;
67695fce210SBarry Smith   PetscSFNode    *roots, *leaves;
67795fce210SBarry Smith 
67895fce210SBarry Smith   PetscFunctionBegin;
67929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
68029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6814f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
68229046d53SLisandro Dalcin 
6839566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
68429046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
68529046d53SLisandro Dalcin 
6869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
688ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
68995fce210SBarry Smith     leaves[i].rank  = rank;
69095fce210SBarry Smith     leaves[i].index = i;
69195fce210SBarry Smith   }
69295fce210SBarry Smith   for (i = 0; i < nroots; i++) {
69395fce210SBarry Smith     roots[i].rank  = -1;
69495fce210SBarry Smith     roots[i].index = -1;
69595fce210SBarry Smith   }
6966497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
6976497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
69895fce210SBarry Smith 
69995fce210SBarry Smith   /* Check whether our leaves are sparse */
7009371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
7019371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
70295fce210SBarry Smith   if (count == nroots) newilocal = NULL;
7039371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
70495fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
70595fce210SBarry Smith       if (roots[i].rank >= 0) {
70695fce210SBarry Smith         newilocal[count]   = i;
70795fce210SBarry Smith         roots[count].rank  = roots[i].rank;
70895fce210SBarry Smith         roots[count].index = roots[i].index;
70995fce210SBarry Smith         count++;
71095fce210SBarry Smith       }
71195fce210SBarry Smith     }
71295fce210SBarry Smith   }
71395fce210SBarry Smith 
7149566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7159566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7169566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71895fce210SBarry Smith }
71995fce210SBarry Smith 
72095fce210SBarry Smith /*@
721cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
72295fce210SBarry Smith 
72395fce210SBarry Smith   Collective
72495fce210SBarry Smith 
7254165533cSJose E. Roman   Input Parameters:
72695fce210SBarry Smith + sf  - communication object to duplicate
727cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
72895fce210SBarry Smith 
7294165533cSJose E. Roman   Output Parameter:
73095fce210SBarry Smith . newsf - new communication object
73195fce210SBarry Smith 
73295fce210SBarry Smith   Level: beginner
73395fce210SBarry Smith 
73420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
73595fce210SBarry Smith @*/
736d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
737d71ae5a4SJacob Faibussowitsch {
73829046d53SLisandro Dalcin   PetscSFType  type;
73997929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
74095fce210SBarry Smith 
74195fce210SBarry Smith   PetscFunctionBegin;
74229046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
74329046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7444f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7459566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7469566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7479566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
74835cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
74995fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
750dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
751dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
75295fce210SBarry Smith       PetscInt           nroots, nleaves;
75395fce210SBarry Smith       const PetscInt    *ilocal;
75495fce210SBarry Smith       const PetscSFNode *iremote;
7559566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7569566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
757dd5b3ca6SJunchao Zhang     } else {
7589566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
759dd5b3ca6SJunchao Zhang     }
76095fce210SBarry Smith   }
76197929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7629566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
76397929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
76497929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
76597929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
76697929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
76797929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
76897929ea7SJunchao Zhang 
76920c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
77020c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
77171438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
77220c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
77320c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
77420c24465SJunchao Zhang #endif
775dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
77620c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77895fce210SBarry Smith }
77995fce210SBarry Smith 
78095fce210SBarry Smith /*@C
78195fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
78295fce210SBarry Smith 
78395fce210SBarry Smith   Not Collective
78495fce210SBarry Smith 
7854165533cSJose E. Roman   Input Parameter:
78695fce210SBarry Smith . sf - star forest
78795fce210SBarry Smith 
7884165533cSJose E. Roman   Output Parameters:
78995fce210SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
79095fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process
79120662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
79295fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
79395fce210SBarry Smith 
794cab54364SBarry Smith   Level: intermediate
795cab54364SBarry Smith 
796373e0d91SLisandro Dalcin   Notes:
79720662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
798373e0d91SLisandro Dalcin 
79920662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
800db2b9530SVaclav Hapla 
801ce78bad3SBarry Smith   Fortran Note:
802ce78bad3SBarry Smith   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
803ca797d7aSLawrence Mitchell 
80420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
80595fce210SBarry Smith @*/
806ce78bad3SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
807d71ae5a4SJacob Faibussowitsch {
80895fce210SBarry Smith   PetscFunctionBegin;
80995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
810b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
811f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
812b8dee149SJunchao Zhang   } else {
81395fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
81495fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
81595fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
81695fce210SBarry Smith     if (iremote) *iremote = sf->remote;
817b8dee149SJunchao Zhang   }
8183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81995fce210SBarry Smith }
82095fce210SBarry Smith 
82129046d53SLisandro Dalcin /*@
82295fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
82395fce210SBarry Smith 
82495fce210SBarry Smith   Not Collective
82595fce210SBarry Smith 
8264165533cSJose E. Roman   Input Parameter:
82795fce210SBarry Smith . sf - star forest
82895fce210SBarry Smith 
8294165533cSJose E. Roman   Output Parameters:
83020662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
83120662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
83295fce210SBarry Smith 
83395fce210SBarry Smith   Level: developer
83495fce210SBarry Smith 
83520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
83695fce210SBarry Smith @*/
837d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
838d71ae5a4SJacob Faibussowitsch {
83995fce210SBarry Smith   PetscFunctionBegin;
84095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
84129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
84295fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
84395fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84595fce210SBarry Smith }
84695fce210SBarry Smith 
847ffeef943SBarry Smith /*@
848cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
849fe2efc57SMark 
85020f4b53cSBarry Smith   Collective
851fe2efc57SMark 
852fe2efc57SMark   Input Parameters:
853fe2efc57SMark + A    - the star forest
854cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
855736c3998SJose E. Roman - name - command line option
856fe2efc57SMark 
857fe2efc57SMark   Level: intermediate
858cab54364SBarry Smith 
85920662ed9SBarry Smith   Note:
86020662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
86120662ed9SBarry Smith 
862db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
863fe2efc57SMark @*/
864d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
865d71ae5a4SJacob Faibussowitsch {
866fe2efc57SMark   PetscFunctionBegin;
867fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8689566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
870fe2efc57SMark }
871fe2efc57SMark 
872ffeef943SBarry Smith /*@
87395fce210SBarry Smith   PetscSFView - view a star forest
87495fce210SBarry Smith 
87595fce210SBarry Smith   Collective
87695fce210SBarry Smith 
8774165533cSJose E. Roman   Input Parameters:
87895fce210SBarry Smith + sf     - star forest
879cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
88095fce210SBarry Smith 
88195fce210SBarry Smith   Level: beginner
88295fce210SBarry Smith 
883cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
88495fce210SBarry Smith @*/
885d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
886d71ae5a4SJacob Faibussowitsch {
8879f196a02SMartin Diehl   PetscBool         isascii;
88895fce210SBarry Smith   PetscViewerFormat format;
88995fce210SBarry Smith 
89095fce210SBarry Smith   PetscFunctionBegin;
89195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8929566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
89395fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
89495fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8959566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8969f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8979f196a02SMartin Diehl   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
89895fce210SBarry Smith     PetscMPIInt rank;
8996497c311SBarry Smith     PetscInt    j;
90095fce210SBarry Smith 
9019566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
9029566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
903dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
90480153354SVaclav Hapla       if (!sf->graphset) {
9059566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
9069566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
9073ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
90880153354SVaclav Hapla       }
9099566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9109566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9116497c311SBarry Smith       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
912835f2295SStefano Zampini       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9139566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9149566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
91595fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
91681bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9176497c311SBarry Smith 
9189566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9199566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
9206497c311SBarry Smith         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
9219566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9229566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
9236497c311SBarry Smith         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
9246497c311SBarry Smith           PetscMPIInt i = perm[ii];
9256497c311SBarry Smith 
9269566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
92748a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
92895fce210SBarry Smith         }
9299566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
93095fce210SBarry Smith       }
9319566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9329566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
933dd5b3ca6SJunchao Zhang     }
9349566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
93595fce210SBarry Smith   }
936dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
93895fce210SBarry Smith }
93995fce210SBarry Smith 
94095fce210SBarry Smith /*@C
941dec1416fSJunchao Zhang   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
94295fce210SBarry Smith 
94395fce210SBarry Smith   Not Collective
94495fce210SBarry Smith 
9454165533cSJose E. Roman   Input Parameter:
94695fce210SBarry Smith . sf - star forest
94795fce210SBarry Smith 
9484165533cSJose E. Roman   Output Parameters:
94995fce210SBarry Smith + nranks  - number of ranks referenced by local part
95020662ed9SBarry Smith . ranks   - [`nranks`] array of ranks
95120662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
9526497c311SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
9536497c311SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
95495fce210SBarry Smith 
95595fce210SBarry Smith   Level: developer
95695fce210SBarry Smith 
957cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
95895fce210SBarry Smith @*/
9599c9354e5SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
960d71ae5a4SJacob Faibussowitsch {
96195fce210SBarry Smith   PetscFunctionBegin;
96295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
96328b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
964dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9659927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
966dec1416fSJunchao Zhang   } else {
967dec1416fSJunchao Zhang     /* The generic implementation */
96895fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
96995fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
97095fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
97195fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
97295fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
973dec1416fSJunchao Zhang   }
9743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
97595fce210SBarry Smith }
97695fce210SBarry Smith 
9778750ddebSJunchao Zhang /*@C
9788750ddebSJunchao Zhang   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9798750ddebSJunchao Zhang 
9808750ddebSJunchao Zhang   Not Collective
9818750ddebSJunchao Zhang 
9824165533cSJose E. Roman   Input Parameter:
9838750ddebSJunchao Zhang . sf - star forest
9848750ddebSJunchao Zhang 
9854165533cSJose E. Roman   Output Parameters:
9868750ddebSJunchao Zhang + niranks  - number of leaf ranks referencing roots on this process
98720662ed9SBarry Smith . iranks   - [`niranks`] array of ranks
98820662ed9SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
98920662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
9908750ddebSJunchao Zhang 
9918750ddebSJunchao Zhang   Level: developer
9928750ddebSJunchao Zhang 
993cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9948750ddebSJunchao Zhang @*/
9959c9354e5SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
996d71ae5a4SJacob Faibussowitsch {
9978750ddebSJunchao Zhang   PetscFunctionBegin;
9988750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
99928b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
10008750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
10019927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
10028750ddebSJunchao Zhang   } else {
10038750ddebSJunchao Zhang     PetscSFType type;
10049566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
100598921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
10068750ddebSJunchao Zhang   }
10073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10088750ddebSJunchao Zhang }
10098750ddebSJunchao Zhang 
1010d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1011d71ae5a4SJacob Faibussowitsch {
1012b5a8e515SJed Brown   PetscInt i;
1013b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1014b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1015b5a8e515SJed Brown   }
1016b5a8e515SJed Brown   return PETSC_FALSE;
1017b5a8e515SJed Brown }
1018b5a8e515SJed Brown 
101995fce210SBarry Smith /*@C
1020cab54364SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
102121c688dcSJed Brown 
102221c688dcSJed Brown   Collective
102321c688dcSJed Brown 
10244165533cSJose E. Roman   Input Parameters:
1025cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1026cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
102721c688dcSJed Brown 
102821c688dcSJed Brown   Level: developer
102921c688dcSJed Brown 
1030cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
103121c688dcSJed Brown @*/
1032d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1033d71ae5a4SJacob Faibussowitsch {
1034eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1035eec179cfSJacob Faibussowitsch   PetscHashIter pos;
10366497c311SBarry Smith   PetscMPIInt   size, groupsize, *groupranks, *ranks;
10376497c311SBarry Smith   PetscInt     *rcount;
10386497c311SBarry Smith   PetscInt      irank, sfnrank, ranksi;
10396497c311SBarry Smith   PetscMPIInt   i, orank = -1;
104021c688dcSJed Brown 
104121c688dcSJed Brown   PetscFunctionBegin;
104221c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
104329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1045eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
104621c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
104721c688dcSJed Brown     /* Log 1-based rank */
1048eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
104921c688dcSJed Brown   }
10506497c311SBarry Smith   PetscCall(PetscHMapIGetSize(table, &sfnrank));
10516497c311SBarry Smith   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
10529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1054eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
105521c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10566497c311SBarry Smith     PetscHashIterGetKey(table, pos, ranksi);
10576497c311SBarry Smith     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1058eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1059eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
106021c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
106121c688dcSJed Brown   }
1062eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1063b5a8e515SJed Brown 
1064b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1065b5a8e515SJed Brown   {
10667fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1067b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10686497c311SBarry Smith 
10699566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10729566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1073b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10749566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10759566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10769566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1077b5a8e515SJed Brown   }
1078b5a8e515SJed Brown 
1079b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1080b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1081b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1082b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1083b5a8e515SJed Brown     }
1084b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1085b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1086b5a8e515SJed Brown     }
1087b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
10886497c311SBarry Smith       PetscMPIInt tmprank;
10896497c311SBarry Smith       PetscInt    tmpcount;
1090247e8311SStefano Zampini 
1091b5a8e515SJed Brown       tmprank             = ranks[i];
1092b5a8e515SJed Brown       tmpcount            = rcount[i];
1093b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1094b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1095b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1096b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1097b5a8e515SJed Brown       sf->ndranks++;
1098b5a8e515SJed Brown     }
1099b5a8e515SJed Brown   }
11009566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
11016497c311SBarry Smith   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
11026497c311SBarry Smith   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
110321c688dcSJed Brown   sf->roffset[0] = 0;
110421c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
11059566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
110621c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
110721c688dcSJed Brown     rcount[i]          = 0;
110821c688dcSJed Brown   }
1109247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1110247e8311SStefano Zampini     /* short circuit */
1111247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
111221c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
1113835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1114835f2295SStefano Zampini       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1115b5a8e515SJed Brown       if (irank < 0) {
1116835f2295SStefano Zampini         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1117b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
111821c688dcSJed Brown       }
1119247e8311SStefano Zampini     }
1120835f2295SStefano Zampini     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
112121c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
112221c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
112321c688dcSJed Brown     rcount[irank]++;
112421c688dcSJed Brown   }
11259566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
112721c688dcSJed Brown }
112821c688dcSJed Brown 
112921c688dcSJed Brown /*@C
113095fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
113195fce210SBarry Smith 
113295fce210SBarry Smith   Collective
113395fce210SBarry Smith 
11344165533cSJose E. Roman   Input Parameter:
113595fce210SBarry Smith . sf - star forest
113695fce210SBarry Smith 
11374165533cSJose E. Roman   Output Parameters:
113895fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
113995fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
114095fce210SBarry Smith 
114195fce210SBarry Smith   Level: developer
114295fce210SBarry Smith 
1143cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
114495fce210SBarry Smith @*/
1145d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1146d71ae5a4SJacob Faibussowitsch {
11477fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
114895fce210SBarry Smith 
114995fce210SBarry Smith   PetscFunctionBegin;
115008401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
115195fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
115295fce210SBarry Smith     PetscInt        i;
115395fce210SBarry Smith     const PetscInt *indegree;
11546497c311SBarry Smith     PetscMPIInt     rank, *outranks, *inranks, indegree0;
115595fce210SBarry Smith     PetscSFNode    *remote;
115695fce210SBarry Smith     PetscSF         bgcount;
115795fce210SBarry Smith 
115895fce210SBarry Smith     /* Compute the number of incoming ranks */
11599566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
116095fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
116195fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
116295fce210SBarry Smith       remote[i].index = 0;
116395fce210SBarry Smith     }
11649566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11659566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11669566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11679566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
116895fce210SBarry Smith     /* Enumerate the incoming ranks */
11699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
117195fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11729566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11739566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11749566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11756497c311SBarry Smith     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
11766497c311SBarry Smith     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
11779566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11789566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11799566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
118095fce210SBarry Smith   }
118195fce210SBarry Smith   *incoming = sf->ingroup;
118295fce210SBarry Smith 
118395fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11849566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11859566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
118795fce210SBarry Smith   }
118895fce210SBarry Smith   *outgoing = sf->outgroup;
11893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119095fce210SBarry Smith }
119195fce210SBarry Smith 
119229046d53SLisandro Dalcin /*@
11930dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11940dd791a8SStefano Zampini 
11950dd791a8SStefano Zampini   Collective
11960dd791a8SStefano Zampini 
11970dd791a8SStefano Zampini   Input Parameter:
11980dd791a8SStefano Zampini . sf - star forest
11990dd791a8SStefano Zampini 
12000dd791a8SStefano Zampini   Output Parameter:
12010dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications
12020dd791a8SStefano Zampini 
12030dd791a8SStefano Zampini   Level: developer
12040dd791a8SStefano Zampini 
12050dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
12060dd791a8SStefano Zampini @*/
12070dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
12080dd791a8SStefano Zampini {
12090dd791a8SStefano Zampini   PetscFunctionBegin;
12100dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12110dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
12120dd791a8SStefano Zampini   if (!sf->rankssf) {
12130dd791a8SStefano Zampini     PetscSFNode       *rremotes;
12140dd791a8SStefano Zampini     const PetscMPIInt *ranks;
12156497c311SBarry Smith     PetscMPIInt        nranks;
12160dd791a8SStefano Zampini 
12170dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12180dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12190dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12200dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12210dd791a8SStefano Zampini       rremotes[i].index = 0;
12220dd791a8SStefano Zampini     }
12230dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12240dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12250dd791a8SStefano Zampini   }
12260dd791a8SStefano Zampini   *rsf = sf->rankssf;
12270dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12280dd791a8SStefano Zampini }
12290dd791a8SStefano Zampini 
12300dd791a8SStefano Zampini /*@
1231cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
123295fce210SBarry Smith 
123395fce210SBarry Smith   Collective
123495fce210SBarry Smith 
12354165533cSJose E. Roman   Input Parameter:
123695fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
123795fce210SBarry Smith 
12384165533cSJose E. Roman   Output Parameter:
123995fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1
124095fce210SBarry Smith 
124195fce210SBarry Smith   Level: developer
124295fce210SBarry Smith 
1243cab54364SBarry Smith   Note:
1244cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
124595fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
124695fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
124795fce210SBarry Smith 
1248cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
124995fce210SBarry Smith @*/
1250d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1251d71ae5a4SJacob Faibussowitsch {
125295fce210SBarry Smith   PetscFunctionBegin;
125395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12544f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
125595fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12569566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
125795fce210SBarry Smith     *multi           = sf->multi;
1258013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12593ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
126095fce210SBarry Smith   }
126195fce210SBarry Smith   if (!sf->multi) {
126295fce210SBarry Smith     const PetscInt *indegree;
12639837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
126495fce210SBarry Smith     PetscSFNode    *remote;
126529046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12669566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12679566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12689566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
126995fce210SBarry Smith     inoffset[0] = 0;
127095fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12719837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12729566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12739566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
127495fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
127576bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1276ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
127776bd3646SJed Brown     }
12789566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
127995fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
128095fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
128138e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
128295fce210SBarry Smith     }
12839566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1284013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12859566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
128695fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
128795fce210SBarry Smith       PetscMPIInt  rank;
128895fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
128995fce210SBarry Smith       PetscSFNode *newremote;
12909566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
129195fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12929566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12939837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12949566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12959566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
129695fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
129795fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
129895fce210SBarry Smith         PetscInt j;
129995fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
13008e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
130195fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
130295fce210SBarry Smith       }
13039566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13049566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13059566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
130695fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
130795fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
130801365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
130995fce210SBarry Smith       }
13109566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
13119566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
131295fce210SBarry Smith     }
13139566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
131495fce210SBarry Smith   }
131595fce210SBarry Smith   *multi = sf->multi;
13163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131795fce210SBarry Smith }
131895fce210SBarry Smith 
131995fce210SBarry Smith /*@C
132020662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
132195fce210SBarry Smith 
132295fce210SBarry Smith   Collective
132395fce210SBarry Smith 
13244165533cSJose E. Roman   Input Parameters:
132595fce210SBarry Smith + sf        - original star forest
1326ba2a7774SJunchao Zhang . nselected - number of selected roots on this process
1327ba2a7774SJunchao Zhang - selected  - indices of the selected roots on this process
132895fce210SBarry Smith 
13294165533cSJose E. Roman   Output Parameter:
1330cd620004SJunchao Zhang . esf - new star forest
133195fce210SBarry Smith 
133295fce210SBarry Smith   Level: advanced
133395fce210SBarry Smith 
133495fce210SBarry Smith   Note:
1335cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
133695fce210SBarry Smith   be done by calling PetscSFGetGraph().
133795fce210SBarry Smith 
1338cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
133995fce210SBarry Smith @*/
1340*cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1341d71ae5a4SJacob Faibussowitsch {
1342cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1343cd620004SJunchao Zhang   const PetscInt    *ilocal;
1344cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1345ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1346f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1347f659e5c7SJunchao Zhang   MPI_Comm           comm;
134895fce210SBarry Smith 
134995fce210SBarry Smith   PetscFunctionBegin;
135095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
135129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13524f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13534f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13540511a646SMatthew G. Knepley 
13559566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13569566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13579566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13589566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1359cd620004SJunchao Zhang 
136076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1361cd620004SJunchao Zhang     PetscBool dups;
13629566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
136328b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1364511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1365cd620004SJunchao Zhang   }
1366f659e5c7SJunchao Zhang 
1367dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1368dbbe0bcdSBarry Smith   else {
1369cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13709566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1371cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13729566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13738e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1374cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1375cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13769566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13779566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1378ba2a7774SJunchao Zhang 
1379cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1380cd620004SJunchao Zhang     esf_nleaves = 0;
1381cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1382cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1383cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1384cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1385cd620004SJunchao Zhang       */
1386cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1387cd620004SJunchao Zhang     }
13889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1390cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1391cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1392cd620004SJunchao Zhang       if (leafdata[j]) {
1393cd620004SJunchao Zhang         new_ilocal[n]        = j;
1394cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1395cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1396fc1ede2bSMatthew G. Knepley         ++n;
139795fce210SBarry Smith       }
139895fce210SBarry Smith     }
13999566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
14009566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
14019566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
14029566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1403f659e5c7SJunchao Zhang   }
14049566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
14053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140695fce210SBarry Smith }
140795fce210SBarry Smith 
14082f5fb4c2SMatthew G. Knepley /*@C
140920662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
14102f5fb4c2SMatthew G. Knepley 
14112f5fb4c2SMatthew G. Knepley   Collective
14122f5fb4c2SMatthew G. Knepley 
14134165533cSJose E. Roman   Input Parameters:
14142f5fb4c2SMatthew G. Knepley + sf        - original star forest
1415f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process
1416f659e5c7SJunchao Zhang - selected  - indices of the selected leaves on this process
14172f5fb4c2SMatthew G. Knepley 
14184165533cSJose E. Roman   Output Parameter:
14192f5fb4c2SMatthew G. Knepley . newsf - new star forest
14202f5fb4c2SMatthew G. Knepley 
14212f5fb4c2SMatthew G. Knepley   Level: advanced
14222f5fb4c2SMatthew G. Knepley 
1423cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14242f5fb4c2SMatthew G. Knepley @*/
1425*cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1426d71ae5a4SJacob Faibussowitsch {
1427f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1428f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1429f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1430f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1431f659e5c7SJunchao Zhang   MPI_Comm           comm;
14322f5fb4c2SMatthew G. Knepley 
14332f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14342f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
143529046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14364f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14374f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14382f5fb4c2SMatthew G. Knepley 
1439f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14409566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14429566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14439566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
144408401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1445f659e5c7SJunchao Zhang 
1446f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1447dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1448dbbe0bcdSBarry Smith   else {
14499566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14509566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14519566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1452f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1453f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1454f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1455f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1456f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14572f5fb4c2SMatthew G. Knepley     }
14589566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14599566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1460f659e5c7SJunchao Zhang   }
14619566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14632f5fb4c2SMatthew G. Knepley }
14642f5fb4c2SMatthew G. Knepley 
146595fce210SBarry Smith /*@C
1466cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14673482bfa8SJunchao Zhang 
1468c3339decSBarry Smith   Collective
14693482bfa8SJunchao Zhang 
14704165533cSJose E. Roman   Input Parameters:
14713482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14723482bfa8SJunchao Zhang . unit     - data type associated with each node
14733482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14743482bfa8SJunchao Zhang - op       - operation to use for reduction
14753482bfa8SJunchao Zhang 
14764165533cSJose E. Roman   Output Parameter:
14773482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14783482bfa8SJunchao Zhang 
14793482bfa8SJunchao Zhang   Level: intermediate
14803482bfa8SJunchao Zhang 
148120662ed9SBarry Smith   Note:
148220662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1483da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1484cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1485cab54364SBarry Smith 
1486cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14873482bfa8SJunchao Zhang @*/
1488d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1489d71ae5a4SJacob Faibussowitsch {
1490eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14913482bfa8SJunchao Zhang 
14923482bfa8SJunchao Zhang   PetscFunctionBegin;
14933482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14949566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14959566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14969566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14979566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1498dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14999566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15013482bfa8SJunchao Zhang }
15023482bfa8SJunchao Zhang 
15033482bfa8SJunchao Zhang /*@C
150420662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
150520662ed9SBarry Smith   to `PetscSFBcastEnd()`
1506d0295fc0SJunchao Zhang 
1507c3339decSBarry Smith   Collective
1508d0295fc0SJunchao Zhang 
15094165533cSJose E. Roman   Input Parameters:
1510d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1511d0295fc0SJunchao Zhang . unit      - data type associated with each node
1512d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1513d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1514d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1515d0295fc0SJunchao Zhang - op        - operation to use for reduction
1516d0295fc0SJunchao Zhang 
15174165533cSJose E. Roman   Output Parameter:
1518d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1519d0295fc0SJunchao Zhang 
1520d0295fc0SJunchao Zhang   Level: intermediate
1521d0295fc0SJunchao Zhang 
1522cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1523d0295fc0SJunchao Zhang @*/
1524d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1525d71ae5a4SJacob Faibussowitsch {
1526d0295fc0SJunchao Zhang   PetscFunctionBegin;
1527d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15289566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15299566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1530dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15319566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1533d0295fc0SJunchao Zhang }
1534d0295fc0SJunchao Zhang 
1535d0295fc0SJunchao Zhang /*@C
153620662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15373482bfa8SJunchao Zhang 
15383482bfa8SJunchao Zhang   Collective
15393482bfa8SJunchao Zhang 
15404165533cSJose E. Roman   Input Parameters:
15413482bfa8SJunchao Zhang + sf       - star forest
15423482bfa8SJunchao Zhang . unit     - data type
15433482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15443482bfa8SJunchao Zhang - op       - operation to use for reduction
15453482bfa8SJunchao Zhang 
15464165533cSJose E. Roman   Output Parameter:
15473482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15483482bfa8SJunchao Zhang 
15493482bfa8SJunchao Zhang   Level: intermediate
15503482bfa8SJunchao Zhang 
1551cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15523482bfa8SJunchao Zhang @*/
1553d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1554d71ae5a4SJacob Faibussowitsch {
15553482bfa8SJunchao Zhang   PetscFunctionBegin;
15563482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15579566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1558dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15599566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15613482bfa8SJunchao Zhang }
15623482bfa8SJunchao Zhang 
15633482bfa8SJunchao Zhang /*@C
1564cab54364SBarry Smith   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
156595fce210SBarry Smith 
156695fce210SBarry Smith   Collective
156795fce210SBarry Smith 
15684165533cSJose E. Roman   Input Parameters:
156995fce210SBarry Smith + sf       - star forest
157095fce210SBarry Smith . unit     - data type
157195fce210SBarry Smith . leafdata - values to reduce
157295fce210SBarry Smith - op       - reduction operation
157395fce210SBarry Smith 
15744165533cSJose E. Roman   Output Parameter:
157595fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
157695fce210SBarry Smith 
157795fce210SBarry Smith   Level: intermediate
157895fce210SBarry Smith 
157920662ed9SBarry Smith   Note:
158020662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1581da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1582cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1583d0295fc0SJunchao Zhang 
158420662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
158595fce210SBarry Smith @*/
1586d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1587d71ae5a4SJacob Faibussowitsch {
1588eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
158995fce210SBarry Smith 
159095fce210SBarry Smith   PetscFunctionBegin;
159195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15929566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15939566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15949566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15959566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1596f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15979566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
159995fce210SBarry Smith }
160095fce210SBarry Smith 
160195fce210SBarry Smith /*@C
1602cab54364SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1603d0295fc0SJunchao Zhang 
1604d0295fc0SJunchao Zhang   Collective
1605d0295fc0SJunchao Zhang 
16064165533cSJose E. Roman   Input Parameters:
1607d0295fc0SJunchao Zhang + sf        - star forest
1608d0295fc0SJunchao Zhang . unit      - data type
1609d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1610d0295fc0SJunchao Zhang . leafdata  - values to reduce
1611d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1612d0295fc0SJunchao Zhang - op        - reduction operation
1613d0295fc0SJunchao Zhang 
16144165533cSJose E. Roman   Output Parameter:
1615d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1616d0295fc0SJunchao Zhang 
1617d0295fc0SJunchao Zhang   Level: intermediate
1618d0295fc0SJunchao Zhang 
161920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1620d0295fc0SJunchao Zhang @*/
1621d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1622d71ae5a4SJacob Faibussowitsch {
1623d0295fc0SJunchao Zhang   PetscFunctionBegin;
1624d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16259566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16269566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1627f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16289566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1630d0295fc0SJunchao Zhang }
1631d0295fc0SJunchao Zhang 
1632d0295fc0SJunchao Zhang /*@C
163320662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
163495fce210SBarry Smith 
163595fce210SBarry Smith   Collective
163695fce210SBarry Smith 
16374165533cSJose E. Roman   Input Parameters:
163895fce210SBarry Smith + sf       - star forest
163995fce210SBarry Smith . unit     - data type
164095fce210SBarry Smith . leafdata - values to reduce
164195fce210SBarry Smith - op       - reduction operation
164295fce210SBarry Smith 
16434165533cSJose E. Roman   Output Parameter:
164495fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
164595fce210SBarry Smith 
164695fce210SBarry Smith   Level: intermediate
164795fce210SBarry Smith 
164820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
164995fce210SBarry Smith @*/
1650d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1651d71ae5a4SJacob Faibussowitsch {
165295fce210SBarry Smith   PetscFunctionBegin;
165395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16549566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1655dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16569566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
165895fce210SBarry Smith }
165995fce210SBarry Smith 
166095fce210SBarry Smith /*@C
1661cab54364SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1662cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1663a1729e3fSJunchao Zhang 
1664a1729e3fSJunchao Zhang   Collective
1665a1729e3fSJunchao Zhang 
16664165533cSJose E. Roman   Input Parameters:
1667a1729e3fSJunchao Zhang + sf       - star forest
1668a1729e3fSJunchao Zhang . unit     - data type
1669a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1670a1729e3fSJunchao Zhang - op       - operation to use for reduction
1671a1729e3fSJunchao Zhang 
16724165533cSJose E. Roman   Output Parameters:
1673a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1674a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1675a1729e3fSJunchao Zhang 
1676a1729e3fSJunchao Zhang   Level: advanced
1677a1729e3fSJunchao Zhang 
1678a1729e3fSJunchao Zhang   Note:
1679a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1680a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1681a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1682a1729e3fSJunchao Zhang   integers.
1683a1729e3fSJunchao Zhang 
1684cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1685a1729e3fSJunchao Zhang @*/
1686d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1687d71ae5a4SJacob Faibussowitsch {
1688eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1689a1729e3fSJunchao Zhang 
1690a1729e3fSJunchao Zhang   PetscFunctionBegin;
1691a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16929566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16939566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16949566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16959566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16969566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
169708401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1698dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16999566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1701a1729e3fSJunchao Zhang }
1702a1729e3fSJunchao Zhang 
1703a1729e3fSJunchao Zhang /*@C
1704cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1705cab54364SBarry Smith   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1706d3b3e55cSJunchao Zhang 
1707d3b3e55cSJunchao Zhang   Collective
1708d3b3e55cSJunchao Zhang 
1709d3b3e55cSJunchao Zhang   Input Parameters:
1710d3b3e55cSJunchao Zhang + sf              - star forest
1711d3b3e55cSJunchao Zhang . unit            - data type
1712d3b3e55cSJunchao Zhang . rootmtype       - memory type of rootdata
1713d3b3e55cSJunchao Zhang . leafmtype       - memory type of leafdata
1714d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1715d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate
1716d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1717d3b3e55cSJunchao Zhang 
1718d3b3e55cSJunchao Zhang   Output Parameters:
1719d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1720d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1721d3b3e55cSJunchao Zhang 
1722d3b3e55cSJunchao Zhang   Level: advanced
1723d3b3e55cSJunchao Zhang 
1724cab54364SBarry Smith   Note:
1725cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1726d3b3e55cSJunchao Zhang 
172720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1728d3b3e55cSJunchao Zhang @*/
1729d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1730d71ae5a4SJacob Faibussowitsch {
1731d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1732d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17339566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17349566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
173508401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1736dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17379566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1739d3b3e55cSJunchao Zhang }
1740d3b3e55cSJunchao Zhang 
1741d3b3e55cSJunchao Zhang /*@C
174220662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
174320662ed9SBarry Smith   to fetch values from roots and update atomically by applying operation using my leaf value
1744a1729e3fSJunchao Zhang 
1745a1729e3fSJunchao Zhang   Collective
1746a1729e3fSJunchao Zhang 
17474165533cSJose E. Roman   Input Parameters:
1748a1729e3fSJunchao Zhang + sf       - star forest
1749a1729e3fSJunchao Zhang . unit     - data type
1750a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1751a1729e3fSJunchao Zhang - op       - operation to use for reduction
1752a1729e3fSJunchao Zhang 
17534165533cSJose E. Roman   Output Parameters:
1754a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1755a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1756a1729e3fSJunchao Zhang 
1757a1729e3fSJunchao Zhang   Level: advanced
1758a1729e3fSJunchao Zhang 
175920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1760a1729e3fSJunchao Zhang @*/
1761d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1762d71ae5a4SJacob Faibussowitsch {
1763a1729e3fSJunchao Zhang   PetscFunctionBegin;
1764a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17659566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1766dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17679566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1769a1729e3fSJunchao Zhang }
1770a1729e3fSJunchao Zhang 
1771a1729e3fSJunchao Zhang /*@C
1772cab54364SBarry Smith   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
177395fce210SBarry Smith 
177495fce210SBarry Smith   Collective
177595fce210SBarry Smith 
17764165533cSJose E. Roman   Input Parameter:
177795fce210SBarry Smith . sf - star forest
177895fce210SBarry Smith 
17794165533cSJose E. Roman   Output Parameter:
178095fce210SBarry Smith . degree - degree of each root vertex
178195fce210SBarry Smith 
178295fce210SBarry Smith   Level: advanced
178395fce210SBarry Smith 
1784cab54364SBarry Smith   Note:
178520662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1786ffe67aa5SVáclav Hapla 
1787cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
178895fce210SBarry Smith @*/
17896497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1790d71ae5a4SJacob Faibussowitsch {
179195fce210SBarry Smith   PetscFunctionBegin;
179295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
179395fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17944f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1795803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17965b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
179728b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17985b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
17999566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
18009566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
180129046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
18029837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
18039566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
180495fce210SBarry Smith   }
180595fce210SBarry Smith   *degree = NULL;
18063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
180795fce210SBarry Smith }
180895fce210SBarry Smith 
180995fce210SBarry Smith /*@C
1810cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
181195fce210SBarry Smith 
181295fce210SBarry Smith   Collective
181395fce210SBarry Smith 
18144165533cSJose E. Roman   Input Parameter:
181595fce210SBarry Smith . sf - star forest
181695fce210SBarry Smith 
18174165533cSJose E. Roman   Output Parameter:
181895fce210SBarry Smith . degree - degree of each root vertex
181995fce210SBarry Smith 
182095fce210SBarry Smith   Level: developer
182195fce210SBarry Smith 
1822cab54364SBarry Smith   Note:
182320662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1824ffe67aa5SVáclav Hapla 
1825cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
182695fce210SBarry Smith @*/
18279c9354e5SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1828d71ae5a4SJacob Faibussowitsch {
182995fce210SBarry Smith   PetscFunctionBegin;
183095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
183195fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18324f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
183395fce210SBarry Smith   if (!sf->degreeknown) {
183428b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18359566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18369566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
183795fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
183895fce210SBarry Smith   }
183995fce210SBarry Smith   *degree = sf->degree;
18403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
184195fce210SBarry Smith }
184295fce210SBarry Smith 
1843673100f5SVaclav Hapla /*@C
184420662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
184566dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1846673100f5SVaclav Hapla 
1847673100f5SVaclav Hapla   Collective
1848673100f5SVaclav Hapla 
18494165533cSJose E. Roman   Input Parameters:
1850673100f5SVaclav Hapla + sf     - star forest
1851cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1852673100f5SVaclav Hapla 
18534165533cSJose E. Roman   Output Parameters:
185420662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
185520662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1856673100f5SVaclav Hapla 
1857673100f5SVaclav Hapla   Level: developer
1858673100f5SVaclav Hapla 
1859cab54364SBarry Smith   Note:
186020662ed9SBarry Smith   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1861ffe67aa5SVáclav Hapla 
1862cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1863673100f5SVaclav Hapla @*/
1864d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1865d71ae5a4SJacob Faibussowitsch {
1866673100f5SVaclav Hapla   PetscSF  msf;
186763bfac88SBarry Smith   PetscInt k = 0, nroots, nmroots;
1868673100f5SVaclav Hapla 
1869673100f5SVaclav Hapla   PetscFunctionBegin;
1870673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18719566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18724f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18734f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18744f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18759566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18769566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
187863bfac88SBarry Smith   for (PetscInt i = 0; i < nroots; i++) {
1879673100f5SVaclav Hapla     if (!degree[i]) continue;
188063bfac88SBarry Smith     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1881673100f5SVaclav Hapla   }
188208401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
188366dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1885673100f5SVaclav Hapla }
1886673100f5SVaclav Hapla 
188795fce210SBarry Smith /*@C
1888cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
188995fce210SBarry Smith 
189095fce210SBarry Smith   Collective
189195fce210SBarry Smith 
18924165533cSJose E. Roman   Input Parameters:
189395fce210SBarry Smith + sf       - star forest
189495fce210SBarry Smith . unit     - data type
189595fce210SBarry Smith - leafdata - leaf data to gather to roots
189695fce210SBarry Smith 
18974165533cSJose E. Roman   Output Parameter:
189895fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
189995fce210SBarry Smith 
190095fce210SBarry Smith   Level: intermediate
190195fce210SBarry Smith 
1902cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
190395fce210SBarry Smith @*/
1904d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1905d71ae5a4SJacob Faibussowitsch {
1906a5526d50SJunchao Zhang   PetscSF multi = NULL;
190795fce210SBarry Smith 
190895fce210SBarry Smith   PetscFunctionBegin;
190995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19109566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19119566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19129566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191495fce210SBarry Smith }
191595fce210SBarry Smith 
191695fce210SBarry Smith /*@C
1917cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
191895fce210SBarry Smith 
191995fce210SBarry Smith   Collective
192095fce210SBarry Smith 
19214165533cSJose E. Roman   Input Parameters:
192295fce210SBarry Smith + sf       - star forest
192395fce210SBarry Smith . unit     - data type
192495fce210SBarry Smith - leafdata - leaf data to gather to roots
192595fce210SBarry Smith 
19264165533cSJose E. Roman   Output Parameter:
192795fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
192895fce210SBarry Smith 
192995fce210SBarry Smith   Level: intermediate
193095fce210SBarry Smith 
1931cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
193295fce210SBarry Smith @*/
1933d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1934d71ae5a4SJacob Faibussowitsch {
1935a5526d50SJunchao Zhang   PetscSF multi = NULL;
193695fce210SBarry Smith 
193795fce210SBarry Smith   PetscFunctionBegin;
193895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19399566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19409566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
194295fce210SBarry Smith }
194395fce210SBarry Smith 
194495fce210SBarry Smith /*@C
1945cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
194695fce210SBarry Smith 
194795fce210SBarry Smith   Collective
194895fce210SBarry Smith 
19494165533cSJose E. Roman   Input Parameters:
195095fce210SBarry Smith + sf            - star forest
195195fce210SBarry Smith . unit          - data type
195295fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
195395fce210SBarry Smith 
19544165533cSJose E. Roman   Output Parameter:
195595fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
195695fce210SBarry Smith 
195795fce210SBarry Smith   Level: intermediate
195895fce210SBarry Smith 
195920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
196095fce210SBarry Smith @*/
1961d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1962d71ae5a4SJacob Faibussowitsch {
1963a5526d50SJunchao Zhang   PetscSF multi = NULL;
196495fce210SBarry Smith 
196595fce210SBarry Smith   PetscFunctionBegin;
196695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19679566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19689566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19699566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
197195fce210SBarry Smith }
197295fce210SBarry Smith 
197395fce210SBarry Smith /*@C
1974cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
197595fce210SBarry Smith 
197695fce210SBarry Smith   Collective
197795fce210SBarry Smith 
19784165533cSJose E. Roman   Input Parameters:
197995fce210SBarry Smith + sf            - star forest
198095fce210SBarry Smith . unit          - data type
198195fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
198295fce210SBarry Smith 
19834165533cSJose E. Roman   Output Parameter:
198495fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
198595fce210SBarry Smith 
198695fce210SBarry Smith   Level: intermediate
198795fce210SBarry Smith 
198820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
198995fce210SBarry Smith @*/
1990d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1991d71ae5a4SJacob Faibussowitsch {
1992a5526d50SJunchao Zhang   PetscSF multi = NULL;
199395fce210SBarry Smith 
199495fce210SBarry Smith   PetscFunctionBegin;
199595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19969566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19979566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
199995fce210SBarry Smith }
2000a7b3aa13SAta Mesgarnejad 
2001d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2002d71ae5a4SJacob Faibussowitsch {
2003a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
2004a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
2005a072220fSLawrence Mitchell   PetscHSetI      seen;
2006a072220fSLawrence Mitchell 
2007a072220fSLawrence Mitchell   PetscFunctionBegin;
2008b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
20099566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
20109566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
2011a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
2012a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
20139566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
2014a072220fSLawrence Mitchell     }
20159566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
201608401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20179566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2018b458e8f1SJose E. Roman   }
20193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2020a072220fSLawrence Mitchell }
202154729392SStefano Zampini 
2022a7b3aa13SAta Mesgarnejad /*@
2023cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2024a7b3aa13SAta Mesgarnejad 
2025a7b3aa13SAta Mesgarnejad   Input Parameters:
2026cab54364SBarry Smith + sfA - The first `PetscSF`
2027cab54364SBarry Smith - sfB - The second `PetscSF`
2028a7b3aa13SAta Mesgarnejad 
20292fe279fdSBarry Smith   Output Parameter:
2030cab54364SBarry Smith . sfBA - The composite `PetscSF`
2031a7b3aa13SAta Mesgarnejad 
2032a7b3aa13SAta Mesgarnejad   Level: developer
2033a7b3aa13SAta Mesgarnejad 
2034a072220fSLawrence Mitchell   Notes:
2035cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
203654729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
203754729392SStefano Zampini 
203820662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
203920662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
204020662ed9SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
204120662ed9SBarry Smith   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2042a072220fSLawrence Mitchell 
2043db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2044a7b3aa13SAta Mesgarnejad @*/
2045d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2046d71ae5a4SJacob Faibussowitsch {
2047a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2048d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
204954729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
205054729392SStefano Zampini   PetscInt          *localPointsBA;
205154729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
205254729392SStefano Zampini   PetscBool          denseB;
2053a7b3aa13SAta Mesgarnejad 
2054a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2055a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
205629046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
205729046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
205829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
205954729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20604f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20619566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20629566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
206354729392SStefano Zampini 
20649566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20659566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
206620662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
206720662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
206820662ed9SBarry Smith      Example (comm size = 1):
206920662ed9SBarry Smith      sfA: 0 <- (0, 0)
207020662ed9SBarry Smith      sfB: 100 <- (0, 0)
207120662ed9SBarry Smith           101 <- (0, 1)
207220662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
207320662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
207420662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
207520662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
207754729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
207854729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
207954729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
208054729392SStefano Zampini   }
208154729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20820ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20830ea77edaSksagiyam 
20840ea77edaSksagiyam     if (localp >= numRootsB) continue;
20850ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
208654729392SStefano Zampini   }
2087d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20889566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20900ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20910ea77edaSksagiyam     leafdataB[i].rank  = -1;
20920ea77edaSksagiyam     leafdataB[i].index = -1;
20930ea77edaSksagiyam   }
20946497c311SBarry Smith   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20956497c311SBarry Smith   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20969566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2097d41018fbSJunchao Zhang 
209854729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
209954729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
210054729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
210154729392SStefano Zampini     else numLeavesBA++;
210254729392SStefano Zampini   }
210354729392SStefano Zampini   if (denseB) {
2104d41018fbSJunchao Zhang     localPointsBA  = NULL;
2105d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2106d41018fbSJunchao Zhang   } else {
21079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
21089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
210954729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
211054729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
211154729392SStefano Zampini 
211254729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
211354729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
211454729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
211554729392SStefano Zampini       numLeavesBA++;
211654729392SStefano Zampini     }
21179566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2118d41018fbSJunchao Zhang   }
21199566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21209566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21219566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2123a7b3aa13SAta Mesgarnejad }
21241c6ba672SJunchao Zhang 
212504c0ada0SJunchao Zhang /*@
2126cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
212704c0ada0SJunchao Zhang 
212804c0ada0SJunchao Zhang   Input Parameters:
2129cab54364SBarry Smith + sfA - The first `PetscSF`
2130cab54364SBarry Smith - sfB - The second `PetscSF`
213104c0ada0SJunchao Zhang 
21322fe279fdSBarry Smith   Output Parameter:
2133cab54364SBarry Smith . sfBA - The composite `PetscSF`.
213404c0ada0SJunchao Zhang 
213504c0ada0SJunchao Zhang   Level: developer
213604c0ada0SJunchao Zhang 
213754729392SStefano Zampini   Notes:
213820662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
213954729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
214020662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
214154729392SStefano Zampini 
214220662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
214320662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
214420662ed9SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
214520662ed9SBarry Smith   on `sfA`, then
214620662ed9SBarry Smith   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
214754729392SStefano Zampini 
2148db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
214904c0ada0SJunchao Zhang @*/
2150d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2151d71ae5a4SJacob Faibussowitsch {
215204c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
215304c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
215404c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
215554729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
215654729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21575b0d146aSStefano Zampini   MPI_Op             op;
21585b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21595b0d146aSStefano Zampini   PetscBool iswin;
21605b0d146aSStefano Zampini #endif
216104c0ada0SJunchao Zhang 
216204c0ada0SJunchao Zhang   PetscFunctionBegin;
216304c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
216404c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
216504c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
216604c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
216754729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21684f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21699566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21709566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
217154729392SStefano Zampini 
21729566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21739566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21745b0d146aSStefano Zampini 
21755b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21765b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
217783df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21785b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21795b0d146aSStefano Zampini      the root condition is not meet.
21805b0d146aSStefano Zampini    */
21815b0d146aSStefano Zampini   op = MPI_MAXLOC;
21825b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21835b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21849566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
218583df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21865b0d146aSStefano Zampini #endif
21875b0d146aSStefano Zampini 
21889566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
219054729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
219154729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
219254729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
219354729392SStefano Zampini   }
219454729392SStefano Zampini   if (localPointsA) {
219554729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
219654729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
219754729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
219854729392SStefano Zampini     }
219954729392SStefano Zampini   } else {
220054729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
220154729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
220254729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
220354729392SStefano Zampini     }
220454729392SStefano Zampini   }
220554729392SStefano Zampini 
22069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
22079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
220854729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
220954729392SStefano Zampini     remotePointsBA[i].rank  = -1;
221054729392SStefano Zampini     remotePointsBA[i].index = -1;
221154729392SStefano Zampini   }
221254729392SStefano Zampini 
22136497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22146497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22159566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
221654729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
221754729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
221854729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
221954729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
222054729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
222154729392SStefano Zampini     numLeavesBA++;
222254729392SStefano Zampini   }
22239566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22249566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22259566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
222704c0ada0SJunchao Zhang }
222804c0ada0SJunchao Zhang 
22291c6ba672SJunchao Zhang /*
2230cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22311c6ba672SJunchao Zhang 
22322fe279fdSBarry Smith   Input Parameter:
2233cab54364SBarry Smith . sf - The global `PetscSF`
22341c6ba672SJunchao Zhang 
22352fe279fdSBarry Smith   Output Parameter:
2236cab54364SBarry Smith . out - The local `PetscSF`
2237cab54364SBarry Smith 
2238cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
22391c6ba672SJunchao Zhang  */
2240d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2241d71ae5a4SJacob Faibussowitsch {
22421c6ba672SJunchao Zhang   MPI_Comm           comm;
22431c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22441c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22451c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22461c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22471c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22481c6ba672SJunchao Zhang   PetscSF            lsf;
22491c6ba672SJunchao Zhang 
22501c6ba672SJunchao Zhang   PetscFunctionBegin;
22511c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2252dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2253dbbe0bcdSBarry Smith   else {
2254835f2295SStefano Zampini     PetscMPIInt irank;
2255835f2295SStefano Zampini 
22561c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22579566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22589566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22591c6ba672SJunchao Zhang 
22601c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22619566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22629371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
2263835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2264835f2295SStefano Zampini       if (irank == myrank) lnleaves++;
22659371c9d4SSatish Balay     }
22669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22679566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22681c6ba672SJunchao Zhang 
22691c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
2270835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2271835f2295SStefano Zampini       if (irank == myrank) {
22721c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22731c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22741c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22751c6ba672SJunchao Zhang         j++;
22761c6ba672SJunchao Zhang       }
22771c6ba672SJunchao Zhang     }
22789566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22799566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22809566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22819566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22821c6ba672SJunchao Zhang     *out = lsf;
22831c6ba672SJunchao Zhang   }
22843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22851c6ba672SJunchao Zhang }
2286dd5b3ca6SJunchao Zhang 
2287dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2288d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2289d71ae5a4SJacob Faibussowitsch {
2290eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2291dd5b3ca6SJunchao Zhang 
2292dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2293dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22949566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22959566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22969566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22979566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2298dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22999566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
23003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2301dd5b3ca6SJunchao Zhang }
2302dd5b3ca6SJunchao Zhang 
2303157edd7aSVaclav Hapla /*@
2304cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2305157edd7aSVaclav Hapla 
2306157edd7aSVaclav Hapla   Input Parameters:
2307157edd7aSVaclav Hapla + comm        - the communicator
2308cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2309cab54364SBarry Smith . sfs         - the array of input `PetscSF`
23101f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
231120662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2312157edd7aSVaclav Hapla 
23132fe279fdSBarry Smith   Output Parameter:
2314cab54364SBarry Smith . newsf - The resulting `PetscSF`
2315157edd7aSVaclav Hapla 
23161f40158dSVaclav Hapla   Level: advanced
2317157edd7aSVaclav Hapla 
2318157edd7aSVaclav Hapla   Notes:
231920662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2320157edd7aSVaclav Hapla 
232120662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
232220662ed9SBarry Smith 
232320662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
232420662ed9SBarry Smith 
232520662ed9SBarry Smith   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
232620662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
232720662ed9SBarry Smith 
232820662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2329157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2330157edd7aSVaclav Hapla 
233120662ed9SBarry Smith   All root modes retain the essential connectivity condition.
233220662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
233320662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
233420662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
233520662ed9SBarry Smith   and is also the same in the output `PetscSF`.
23361f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23371f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
233820662ed9SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23391f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23401593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23411f40158dSVaclav Hapla   the original root ranks are ignored.
23421f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
234320662ed9SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
234420662ed9SBarry Smith   to keep the load balancing.
234520662ed9SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
23461f40158dSVaclav Hapla 
23471f40158dSVaclav Hapla   Example:
23481f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
234920662ed9SBarry Smith .vb
235020662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
235120662ed9SBarry Smith   for m in {local,global,shared}; do
235220662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
235320662ed9SBarry Smith   done
235420662ed9SBarry Smith .ve
235520662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
235620662ed9SBarry Smith .vb
235720662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
235820662ed9SBarry Smith     type: basic
235920662ed9SBarry Smith     rank #leaves #roots
236020662ed9SBarry Smith     [ 0]       4      2
236120662ed9SBarry Smith     [ 1]       4      2
236220662ed9SBarry Smith     leaves      roots       roots in global numbering
236320662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236420662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
236520662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
236620662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
236720662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
236820662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
236920662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
237020662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
237120662ed9SBarry Smith .ve
2372e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
237320662ed9SBarry Smith .vb
237420662ed9SBarry Smith   rootMode = local:
237520662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
237620662ed9SBarry Smith     type: basic
237720662ed9SBarry Smith     rank #leaves #roots
237820662ed9SBarry Smith     [ 0]       8      4
237920662ed9SBarry Smith     [ 1]       8      4
238020662ed9SBarry Smith     leaves      roots       roots in global numbering
238120662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238220662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238320662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
238420662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
238520662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
238620662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
238720662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
238820662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
238920662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
239020662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
239120662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
239220662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
239320662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
239420662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
239520662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
239620662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
239720662ed9SBarry Smith 
239820662ed9SBarry Smith   rootMode = global:
239920662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
240020662ed9SBarry Smith     type: basic
240120662ed9SBarry Smith     rank #leaves #roots
240220662ed9SBarry Smith     [ 0]       8      4
240320662ed9SBarry Smith     [ 1]       8      4
240420662ed9SBarry Smith     leaves      roots       roots in global numbering
240520662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
240620662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
240720662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
240820662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
240920662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
241020662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
241120662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
241220662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
241320662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241420662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
241520662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
241620662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
241720662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
241820662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
241920662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
242020662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
242120662ed9SBarry Smith 
242220662ed9SBarry Smith   rootMode = shared:
242320662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
242420662ed9SBarry Smith     type: basic
242520662ed9SBarry Smith     rank #leaves #roots
242620662ed9SBarry Smith     [ 0]       8      2
242720662ed9SBarry Smith     [ 1]       8      2
242820662ed9SBarry Smith     leaves      roots       roots in global numbering
242920662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
243020662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
243120662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
243220662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
243320662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
243420662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
243520662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
243620662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
243720662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
243820662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
243920662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
244020662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
244120662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
244220662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
244320662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
244420662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
244520662ed9SBarry Smith .ve
24461f40158dSVaclav Hapla 
24471f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2448157edd7aSVaclav Hapla @*/
24491f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2450d71ae5a4SJacob Faibussowitsch {
2451157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2452157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2453157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2454157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2455157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24561f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24571f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24581f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2459157edd7aSVaclav Hapla 
2460157edd7aSVaclav Hapla   PetscFunctionBegin;
246112f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2462157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2463157edd7aSVaclav Hapla 
24649566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2465157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24664f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2467157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2468157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2469157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2470157edd7aSVaclav Hapla     }
24711f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24724f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24734f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24749566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2475157edd7aSVaclav Hapla   }
2476157edd7aSVaclav Hapla   if (!nsfs) {
24779566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24789566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24793ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2480157edd7aSVaclav Hapla   }
24819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24821f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2483157edd7aSVaclav Hapla 
24841f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2486157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2487157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2488157edd7aSVaclav Hapla     PetscInt nl;
2489157edd7aSVaclav Hapla 
24909566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2491157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2492157edd7aSVaclav Hapla   }
2493157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2494157edd7aSVaclav Hapla 
24951f40158dSVaclav Hapla   /* Calculate number of roots */
24961f40158dSVaclav Hapla   switch (rootMode) {
24971f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
24981f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
24991f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
25001f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
25011f40158dSVaclav Hapla         PetscInt nr;
25021f40158dSVaclav Hapla 
25031f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
25041f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
25051f40158dSVaclav Hapla       }
25061f40158dSVaclav Hapla     }
25071f40158dSVaclav Hapla   } break;
25081f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
25091f40158dSVaclav Hapla     /* Calculate also global layout in this case */
25101f40158dSVaclav Hapla     PetscInt    *nls;
25111f40158dSVaclav Hapla     PetscLayout *lts;
25121f40158dSVaclav Hapla     PetscInt   **inds;
25131f40158dSVaclav Hapla     PetscInt     j;
25141f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
25151f40158dSVaclav Hapla 
25161f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
25171f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
25181f40158dSVaclav Hapla     glayout->bs = 1;
25191f40158dSVaclav Hapla     glayout->n  = 0;
25201f40158dSVaclav Hapla     glayout->N  = 0;
25211f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25221f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25231f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25241f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25251f40158dSVaclav Hapla     }
25261f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25271f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25281f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25291f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25301f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25311f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25321f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25331f40158dSVaclav Hapla     }
25341f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25351f40158dSVaclav Hapla     nRoots = glayout->N;
25361f40158dSVaclav Hapla   } break;
25371f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25381f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25391f40158dSVaclav Hapla     break;
25401f40158dSVaclav Hapla   default:
25411f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25421f40158dSVaclav Hapla   }
25431f40158dSVaclav Hapla 
2544157edd7aSVaclav Hapla   if (!leafOffsets) {
2545157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2546157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2547157edd7aSVaclav Hapla       const PetscInt *ilocal;
2548157edd7aSVaclav Hapla 
25499566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2550157edd7aSVaclav Hapla       if (ilocal) {
2551157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2552157edd7aSVaclav Hapla         break;
2553157edd7aSVaclav Hapla       }
2554157edd7aSVaclav Hapla     }
2555157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2556157edd7aSVaclav Hapla   }
2557157edd7aSVaclav Hapla 
2558157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2559157edd7aSVaclav Hapla   ilocal_new = NULL;
2560157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2562157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2563157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2564157edd7aSVaclav Hapla       const PetscInt *ilocal;
25658e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2566157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2567157edd7aSVaclav Hapla 
25689566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2569157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2570157edd7aSVaclav Hapla     }
2571157edd7aSVaclav Hapla   }
2572157edd7aSVaclav Hapla 
2573157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25741f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25751f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25761f40158dSVaclav Hapla 
25779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2578157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2579157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2580157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2581157edd7aSVaclav Hapla     }
2582157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2583157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2584157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2585157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2586157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25878e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2588157edd7aSVaclav Hapla 
25899566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25909566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2591157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25929566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25939566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25949566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25951f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2596157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
25971f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
25986497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
2599157edd7aSVaclav Hapla         }
26001f40158dSVaclav Hapla         rootOffset += nr;
26011f40158dSVaclav Hapla       } else {
26021f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
26031f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
26046497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
26051f40158dSVaclav Hapla         }
26061f40158dSVaclav Hapla       }
26076497c311SBarry Smith       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26086497c311SBarry Smith       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26099566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
26109566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2611157edd7aSVaclav Hapla     }
2612aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2613157edd7aSVaclav Hapla 
2614157edd7aSVaclav Hapla     /* Build the new SF */
26159566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
26169566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
26171f40158dSVaclav Hapla   } else {
26181f40158dSVaclav Hapla     /* Build the new SF */
26191f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
26201f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26211f40158dSVaclav Hapla   }
26229566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26231f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26241f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26251f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26269566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2628157edd7aSVaclav Hapla }
26298e54d7e8SToby Isaac 
26308e54d7e8SToby Isaac /*@
26318e54d7e8SToby Isaac   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
26328e54d7e8SToby Isaac 
26338e54d7e8SToby Isaac   Collective
26348e54d7e8SToby Isaac 
26358e54d7e8SToby Isaac   Input Parameters:
26368e54d7e8SToby Isaac + sf       - star forest
26378e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
2638d7c1f440SPierre Jolivet . rootdata - root data that will be used for multiple PetscSF communications
2639d7c1f440SPierre Jolivet - leafdata - leaf data that will be used for multiple PetscSF communications
26408e54d7e8SToby Isaac 
26418e54d7e8SToby Isaac   Level: advanced
26428e54d7e8SToby Isaac 
26438e54d7e8SToby Isaac   Notes:
26448e54d7e8SToby Isaac   Implementations of `PetscSF` can make optimizations
26458e54d7e8SToby Isaac   for repeated communication using the same memory regions, but these optimizations
26468e54d7e8SToby Isaac   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26478e54d7e8SToby Isaac   The intended pattern is
26488e54d7e8SToby Isaac 
26498e54d7e8SToby Isaac .vb
26508e54d7e8SToby Isaac   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26518e54d7e8SToby Isaac 
26528e54d7e8SToby Isaac   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26538e54d7e8SToby Isaac   // repeated use of rootdata and leafdata will now be optimized
26548e54d7e8SToby Isaac 
26558e54d7e8SToby Isaac   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26568e54d7e8SToby Isaac   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26578e54d7e8SToby Isaac   // ...
26588e54d7e8SToby Isaac   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26598e54d7e8SToby Isaac   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26608e54d7e8SToby Isaac   // ... (other communications)
26618e54d7e8SToby Isaac 
26628e54d7e8SToby Isaac   // rootdata and leafdata must be deregistered before freeing
26638e54d7e8SToby Isaac   // skipping this can lead to undefined behavior including
26648e54d7e8SToby Isaac   // deadlocks
26658e54d7e8SToby Isaac   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26668e54d7e8SToby Isaac 
26678e54d7e8SToby Isaac   // it is now safe to free rootdata and leafdata
26688e54d7e8SToby Isaac   PetscFree2(rootdata, leafdata);
26698e54d7e8SToby Isaac .ve
26708e54d7e8SToby Isaac 
26718e54d7e8SToby Isaac   If you do not register `rootdata` and `leafdata` it will not cause an error,
26728e54d7e8SToby Isaac   but optimizations that reduce the setup time for each communication cannot be
26738e54d7e8SToby Isaac   made.  Currently, the only implementation of `PetscSF` that benefits from
26748e54d7e8SToby Isaac   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
26758e54d7e8SToby Isaac   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26768e54d7e8SToby Isaac 
26778e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26788e54d7e8SToby Isaac @*/
26798e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26808e54d7e8SToby Isaac {
26818e54d7e8SToby Isaac   PetscFunctionBegin;
26828e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26838e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26848e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26858e54d7e8SToby Isaac }
26868e54d7e8SToby Isaac 
26878e54d7e8SToby Isaac /*@
26888e54d7e8SToby Isaac   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
26898e54d7e8SToby Isaac 
26908e54d7e8SToby Isaac   Collective
26918e54d7e8SToby Isaac 
26928e54d7e8SToby Isaac   Input Parameters:
26938e54d7e8SToby Isaac + sf       - star forest
26948e54d7e8SToby Isaac . unit     - the data type contained within the root and leaf data
26958e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
26968e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
26978e54d7e8SToby Isaac 
26988e54d7e8SToby Isaac   Level: advanced
26998e54d7e8SToby Isaac 
27008e54d7e8SToby Isaac   Note:
27018e54d7e8SToby Isaac   See `PetscSFRegisterPersistent()` for when/how to use this function.
27028e54d7e8SToby Isaac 
27038e54d7e8SToby Isaac .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
27048e54d7e8SToby Isaac @*/
27058e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
27068e54d7e8SToby Isaac {
27078e54d7e8SToby Isaac   PetscFunctionBegin;
27088e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
27098e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
27108e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
27118e54d7e8SToby Isaac }
2712e1187f0dSToby Isaac 
2713e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2714e1187f0dSToby Isaac {
2715e1187f0dSToby Isaac   MPI_Aint lb, lb_true, bytes, bytes_true;
2716e1187f0dSToby Isaac 
2717e1187f0dSToby Isaac   PetscFunctionBegin;
2718e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2719e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2720e1187f0dSToby Isaac   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2721e1187f0dSToby Isaac   *size = bytes;
2722e1187f0dSToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
2723e1187f0dSToby Isaac }
2724