xref: /petsc/src/vec/is/sf/interface/sf.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
495fce210SBarry Smith #include <petscctable.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
77fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
87fd2d3dbSJunchao Zhang #endif
97fd2d3dbSJunchao Zhang 
107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
117fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
127fd2d3dbSJunchao Zhang #endif
137fd2d3dbSJunchao Zhang 
142abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
152abc8c78SJacob Faibussowitsch void PetscSFCheckGraphSet(PetscSF, int);
162abc8c78SJacob Faibussowitsch #else
1795fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
187a46b595SBarry Smith     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME);
1995fce210SBarry Smith   #else
209371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
219371c9d4SSatish Balay       do { \
229371c9d4SSatish Balay       } while (0)
2395fce210SBarry Smith   #endif
242abc8c78SJacob Faibussowitsch #endif
2595fce210SBarry Smith 
264c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
2795fce210SBarry Smith 
288af6ec1cSBarry Smith /*@
2995fce210SBarry Smith    PetscSFCreate - create a star forest communication context
3095fce210SBarry Smith 
31d083f849SBarry Smith    Collective
3295fce210SBarry Smith 
334165533cSJose E. Roman    Input Parameter:
3495fce210SBarry Smith .  comm - communicator on which the star forest will operate
3595fce210SBarry Smith 
364165533cSJose E. Roman    Output Parameter:
3795fce210SBarry Smith .  sf - new star forest context
3895fce210SBarry Smith 
39dd5b3ca6SJunchao Zhang    Options Database Keys:
40cab54364SBarry Smith .  -sf_type type - value of type may be
41cab54364SBarry Smith .vb
42cab54364SBarry Smith     basic     -Use MPI persistent Isend/Irecv for communication (Default)
43cab54364SBarry Smith     window    -Use MPI-3 one-sided window for communication
44cab54364SBarry Smith     neighbor  -Use MPI-3 neighborhood collectives for communication
45cab54364SBarry Smith .ve
46dd5b3ca6SJunchao Zhang 
4795fce210SBarry Smith    Level: intermediate
4895fce210SBarry Smith 
49cab54364SBarry Smith    Note:
50cab54364SBarry Smith    When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51cab54364SBarry Smith    `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52cab54364SBarry Smith    `SF`s are optimized and they have better performance than general `SF`s.
53dd5b3ca6SJunchao Zhang 
54cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5595fce210SBarry Smith @*/
56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57d71ae5a4SJacob Faibussowitsch {
5895fce210SBarry Smith   PetscSF b;
5995fce210SBarry Smith 
6095fce210SBarry Smith   PetscFunctionBegin;
6195fce210SBarry Smith   PetscValidPointer(sf, 2);
629566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6395fce210SBarry Smith 
649566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6595fce210SBarry Smith 
6695fce210SBarry Smith   b->nroots    = -1;
6795fce210SBarry Smith   b->nleaves   = -1;
6829046d53SLisandro Dalcin   b->minleaf   = PETSC_MAX_INT;
6929046d53SLisandro Dalcin   b->maxleaf   = PETSC_MIN_INT;
7095fce210SBarry Smith   b->nranks    = -1;
7195fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7295fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7395fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7495fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7620c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7720c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7871438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7927f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
8020c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
8127f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8227f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8359af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8459af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8520c24465SJunchao Zhang   #endif
8671438e86SJunchao Zhang 
8771438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8871438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8971438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
919566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9271438e86SJunchao Zhang   #endif
9320c24465SJunchao Zhang #endif
9460c22052SBarry Smith   b->vscat.from_n = -1;
9560c22052SBarry Smith   b->vscat.to_n   = -1;
9660c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9795fce210SBarry Smith   *sf             = b;
9895fce210SBarry Smith   PetscFunctionReturn(0);
9995fce210SBarry Smith }
10095fce210SBarry Smith 
10129046d53SLisandro Dalcin /*@
10295fce210SBarry Smith    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10395fce210SBarry Smith 
10495fce210SBarry Smith    Collective
10595fce210SBarry Smith 
1064165533cSJose E. Roman    Input Parameter:
10795fce210SBarry Smith .  sf - star forest
10895fce210SBarry Smith 
10995fce210SBarry Smith    Level: advanced
11095fce210SBarry Smith 
111cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11295fce210SBarry Smith @*/
113d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
114d71ae5a4SJacob Faibussowitsch {
11595fce210SBarry Smith   PetscFunctionBegin;
11695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
117dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
11829046d53SLisandro Dalcin   sf->nroots   = -1;
11929046d53SLisandro Dalcin   sf->nleaves  = -1;
12029046d53SLisandro Dalcin   sf->minleaf  = PETSC_MAX_INT;
12129046d53SLisandro Dalcin   sf->maxleaf  = PETSC_MIN_INT;
12295fce210SBarry Smith   sf->mine     = NULL;
12395fce210SBarry Smith   sf->remote   = NULL;
12429046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1259566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12721c688dcSJed Brown   sf->nranks = -1;
1289566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
12929046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1309566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1319566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1329566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
133013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1349566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13671438e86SJunchao Zhang 
13771438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1389566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
13971438e86SJunchao Zhang #endif
14071438e86SJunchao Zhang 
14195fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
14295fce210SBarry Smith   PetscFunctionReturn(0);
14395fce210SBarry Smith }
14495fce210SBarry Smith 
14595fce210SBarry Smith /*@C
146cab54364SBarry Smith    PetscSFSetType - Set the `PetscSF` communication implementation
14795fce210SBarry Smith 
148cab54364SBarry Smith    Collective on sf
14995fce210SBarry Smith 
15095fce210SBarry Smith    Input Parameters:
151cab54364SBarry Smith +  sf - the `PetscSF` context
15295fce210SBarry Smith -  type - a known method
153cab54364SBarry Smith .vb
154cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
155cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
156cab54364SBarry Smith .ve
15795fce210SBarry Smith 
15895fce210SBarry Smith    Options Database Key:
159cab54364SBarry Smith .  -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor)
160cab54364SBarry Smith 
161cab54364SBarry Smith   Level: intermediate
16295fce210SBarry Smith 
16395fce210SBarry Smith    Notes:
16495fce210SBarry Smith    See "include/petscsf.h" for available methods (for instance)
16595fce210SBarry Smith 
166db781477SPatrick Sanan .seealso: `PetscSFType`, `PetscSFCreate()`
16795fce210SBarry Smith @*/
168d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
169d71ae5a4SJacob Faibussowitsch {
17095fce210SBarry Smith   PetscBool match;
1715f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17295fce210SBarry Smith 
17395fce210SBarry Smith   PetscFunctionBegin;
17495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17595fce210SBarry Smith   PetscValidCharPointer(type, 2);
17695fce210SBarry Smith 
1779566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
17895fce210SBarry Smith   if (match) PetscFunctionReturn(0);
17995fce210SBarry Smith 
1809566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
18128b400f6SJacob Faibussowitsch   PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18229046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
183dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1849566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1859566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1869566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
18795fce210SBarry Smith   PetscFunctionReturn(0);
18895fce210SBarry Smith }
18995fce210SBarry Smith 
19029046d53SLisandro Dalcin /*@C
191cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19229046d53SLisandro Dalcin 
19329046d53SLisandro Dalcin   Not Collective
19429046d53SLisandro Dalcin 
19529046d53SLisandro Dalcin   Input Parameter:
196cab54364SBarry Smith . sf  - the `PetscSF` context
19729046d53SLisandro Dalcin 
19829046d53SLisandro Dalcin   Output Parameter:
199cab54364SBarry Smith . type - the `PetscSF` type name
20029046d53SLisandro Dalcin 
20129046d53SLisandro Dalcin   Level: intermediate
20229046d53SLisandro Dalcin 
203cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20429046d53SLisandro Dalcin @*/
205d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
206d71ae5a4SJacob Faibussowitsch {
20729046d53SLisandro Dalcin   PetscFunctionBegin;
20829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
20929046d53SLisandro Dalcin   PetscValidPointer(type, 2);
21029046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
21129046d53SLisandro Dalcin   PetscFunctionReturn(0);
21229046d53SLisandro Dalcin }
21329046d53SLisandro Dalcin 
2141fb7b255SJunchao Zhang /*@C
21595fce210SBarry Smith    PetscSFDestroy - destroy star forest
21695fce210SBarry Smith 
21795fce210SBarry Smith    Collective
21895fce210SBarry Smith 
2194165533cSJose E. Roman    Input Parameter:
22095fce210SBarry Smith .  sf - address of star forest
22195fce210SBarry Smith 
22295fce210SBarry Smith    Level: intermediate
22395fce210SBarry Smith 
224cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22595fce210SBarry Smith @*/
226d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
227d71ae5a4SJacob Faibussowitsch {
22895fce210SBarry Smith   PetscFunctionBegin;
22995fce210SBarry Smith   if (!*sf) PetscFunctionReturn(0);
23095fce210SBarry Smith   PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1);
2319371c9d4SSatish Balay   if (--((PetscObject)(*sf))->refct > 0) {
2329371c9d4SSatish Balay     *sf = NULL;
2339371c9d4SSatish Balay     PetscFunctionReturn(0);
2349371c9d4SSatish Balay   }
2359566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
236dbbe0bcdSBarry Smith   PetscTryTypeMethod((*sf), Destroy);
2379566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2389566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
2399566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
24095fce210SBarry Smith   PetscFunctionReturn(0);
24195fce210SBarry Smith }
24295fce210SBarry Smith 
243d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
244d71ae5a4SJacob Faibussowitsch {
245c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
246c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
247c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
248c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
249c4e6a40aSLawrence Mitchell 
250c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
25176bd3646SJed Brown   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
2529566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
254c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
255c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
256c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
257c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
258c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
25908401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26008401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
261c4e6a40aSLawrence Mitchell   }
262c4e6a40aSLawrence Mitchell   PetscFunctionReturn(0);
263c4e6a40aSLawrence Mitchell }
264c4e6a40aSLawrence Mitchell 
26595fce210SBarry Smith /*@
26695fce210SBarry Smith    PetscSFSetUp - set up communication structures
26795fce210SBarry Smith 
26895fce210SBarry Smith    Collective
26995fce210SBarry Smith 
2704165533cSJose E. Roman    Input Parameter:
27195fce210SBarry Smith .  sf - star forest communication object
27295fce210SBarry Smith 
27395fce210SBarry Smith    Level: beginner
27495fce210SBarry Smith 
275cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
27695fce210SBarry Smith @*/
277d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
278d71ae5a4SJacob Faibussowitsch {
27995fce210SBarry Smith   PetscFunctionBegin;
28029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
28129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
28295fce210SBarry Smith   if (sf->setupcalled) PetscFunctionReturn(0);
2839566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2849566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2859566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
286dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
28720c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
28820c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
28971438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29071438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
29120c24465SJunchao Zhang   }
29220c24465SJunchao Zhang #endif
29359af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
29459af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
29559af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
29659af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
29759af0bd3SScott Kruger   }
29859af0bd3SScott Kruger #endif
29920c24465SJunchao Zhang 
30059af0bd3SScott Kruger #
30120c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
30220c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
30320c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
30420c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
30520c24465SJunchao Zhang   }
30620c24465SJunchao Zhang #endif
3079566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
30895fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
30995fce210SBarry Smith   PetscFunctionReturn(0);
31095fce210SBarry Smith }
31195fce210SBarry Smith 
3128af6ec1cSBarry Smith /*@
313cab54364SBarry Smith    PetscSFSetFromOptions - set `PetscSF` options using the options database
31495fce210SBarry Smith 
31595fce210SBarry Smith    Logically Collective
31695fce210SBarry Smith 
3174165533cSJose E. Roman    Input Parameter:
31895fce210SBarry Smith .  sf - star forest
31995fce210SBarry Smith 
32095fce210SBarry Smith    Options Database Keys:
32160263706SJed Brown +  -sf_type               - implementation type, see PetscSFSetType()
32251ccb202SJunchao Zhang .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
323b85e67b7SJunchao Zhang .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
324c2a741eeSJunchao Zhang                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
325c06a8e02SRichard Tran Mills                             If true, this option only works with -use_gpu_aware_mpi 1.
32620c24465SJunchao Zhang .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
327c06a8e02SRichard Tran Mills                                If true, this option only works with -use_gpu_aware_mpi 1.
32895fce210SBarry Smith 
32959af0bd3SScott Kruger -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
33059af0bd3SScott Kruger                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33120c24465SJunchao Zhang                               the only available is kokkos.
33220c24465SJunchao Zhang 
33395fce210SBarry Smith    Level: intermediate
334cab54364SBarry Smith 
335cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
33695fce210SBarry Smith @*/
337d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
338d71ae5a4SJacob Faibussowitsch {
33995fce210SBarry Smith   PetscSFType deft;
34095fce210SBarry Smith   char        type[256];
34195fce210SBarry Smith   PetscBool   flg;
34295fce210SBarry Smith 
34395fce210SBarry Smith   PetscFunctionBegin;
34495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
345d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
34695fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3489566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3499566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
3507fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35120c24465SJunchao Zhang   {
35220c24465SJunchao Zhang     char      backendstr[32] = {0};
35359af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
35420c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
355*d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3569566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3579566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3589566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3599566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3609566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36159af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
36220c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
36320c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
36459af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
36528b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
36620c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
36708401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
36820c24465SJunchao Zhang   #endif
36920c24465SJunchao Zhang   }
370c2a741eeSJunchao Zhang #endif
371dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
372d0609cedSBarry Smith   PetscOptionsEnd();
37395fce210SBarry Smith   PetscFunctionReturn(0);
37495fce210SBarry Smith }
37595fce210SBarry Smith 
37629046d53SLisandro Dalcin /*@
37795fce210SBarry Smith    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
37895fce210SBarry Smith 
37995fce210SBarry Smith    Logically Collective
38095fce210SBarry Smith 
3814165533cSJose E. Roman    Input Parameters:
38295fce210SBarry Smith +  sf - star forest
383cab54364SBarry Smith -  flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
38495fce210SBarry Smith 
38595fce210SBarry Smith    Level: advanced
38695fce210SBarry Smith 
387cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
38895fce210SBarry Smith @*/
389d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
390d71ae5a4SJacob Faibussowitsch {
39195fce210SBarry Smith   PetscFunctionBegin;
39295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
39395fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
39428b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
39595fce210SBarry Smith   sf->rankorder = flg;
39695fce210SBarry Smith   PetscFunctionReturn(0);
39795fce210SBarry Smith }
39895fce210SBarry Smith 
3998dbb0df6SBarry Smith /*@C
40095fce210SBarry Smith    PetscSFSetGraph - Set a parallel star forest
40195fce210SBarry Smith 
40295fce210SBarry Smith    Collective
40395fce210SBarry Smith 
4044165533cSJose E. Roman    Input Parameters:
40595fce210SBarry Smith +  sf - star forest
40695fce210SBarry Smith .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
40795fce210SBarry Smith .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
408c4e6a40aSLawrence Mitchell .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
409c4e6a40aSLawrence Mitchell during setup in debug mode)
41095fce210SBarry Smith .  localmode - copy mode for ilocal
411c4e6a40aSLawrence Mitchell .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
412c4e6a40aSLawrence Mitchell during setup in debug mode)
41395fce210SBarry Smith -  remotemode - copy mode for iremote
41495fce210SBarry Smith 
41595fce210SBarry Smith    Level: intermediate
41695fce210SBarry Smith 
41795452b02SPatrick Sanan    Notes:
418db2b9530SVaclav Hapla    Leaf indices in ilocal must be unique, otherwise an error occurs.
41938ab3f8aSBarry Smith 
420db2b9530SVaclav Hapla    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
421cab54364SBarry Smith    In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
422db2b9530SVaclav Hapla    PETSc might modify the respective array;
423cab54364SBarry Smith    if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy().
424cab54364SBarry Smith    Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
425db2b9530SVaclav Hapla 
426cab54364SBarry Smith    Fortran Note:
427db2b9530SVaclav Hapla    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
428c4e6a40aSLawrence Mitchell 
429cab54364SBarry Smith    Developer Note:
430db2b9530SVaclav Hapla    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
431db2b9530SVaclav Hapla    This also allows to compare leaf sets of two SFs easily.
43272bf8598SVaclav Hapla 
433cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
43495fce210SBarry Smith @*/
435d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
436d71ae5a4SJacob Faibussowitsch {
437db2b9530SVaclav Hapla   PetscBool unique, contiguous;
43895fce210SBarry Smith 
43995fce210SBarry Smith   PetscFunctionBegin;
44095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
44129046d53SLisandro Dalcin   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4);
44229046d53SLisandro Dalcin   if (nleaves > 0) PetscValidPointer(iremote, 6);
44308401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
44408401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4458da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4468da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4478da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4488da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
44929046d53SLisandro Dalcin 
4502a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4519566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4522a67d2daSStefano Zampini   }
4532a67d2daSStefano Zampini 
4549566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
45529046d53SLisandro Dalcin 
45695fce210SBarry Smith   sf->nroots  = nroots;
45795fce210SBarry Smith   sf->nleaves = nleaves;
45829046d53SLisandro Dalcin 
459db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
460db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
461db2b9530SVaclav Hapla 
4629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4639566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
464db2b9530SVaclav Hapla     ilocal = tlocal;
465db2b9530SVaclav Hapla   }
466db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
467db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
468db2b9530SVaclav Hapla 
4699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4709566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
471db2b9530SVaclav Hapla     iremote = tremote;
472db2b9530SVaclav Hapla   }
473db2b9530SVaclav Hapla 
47429046d53SLisandro Dalcin   if (nleaves && ilocal) {
475db2b9530SVaclav Hapla     PetscSFNode work;
476db2b9530SVaclav Hapla 
4779566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
4789566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
479db2b9530SVaclav Hapla     unique = PetscNot(unique);
480db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
481db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
482db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
483db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
48429046d53SLisandro Dalcin   } else {
48529046d53SLisandro Dalcin     sf->minleaf = 0;
48629046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
487db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
488db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
48929046d53SLisandro Dalcin   }
49029046d53SLisandro Dalcin 
491db2b9530SVaclav Hapla   if (contiguous) {
492db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
493db2b9530SVaclav Hapla       ilocal = NULL;
494db2b9530SVaclav Hapla     } else {
4959566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
496db2b9530SVaclav Hapla     }
497db2b9530SVaclav Hapla   }
498db2b9530SVaclav Hapla   sf->mine = ilocal;
499db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
50029046d53SLisandro Dalcin     sf->mine_alloc = NULL;
501db2b9530SVaclav Hapla   } else {
502db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
50395fce210SBarry Smith   }
504db2b9530SVaclav Hapla   sf->remote = iremote;
505db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
50629046d53SLisandro Dalcin     sf->remote_alloc = NULL;
507db2b9530SVaclav Hapla   } else {
508db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
50995fce210SBarry Smith   }
5109566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
51129046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
51295fce210SBarry Smith   PetscFunctionReturn(0);
51395fce210SBarry Smith }
51495fce210SBarry Smith 
51529046d53SLisandro Dalcin /*@
516cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
517dd5b3ca6SJunchao Zhang 
518dd5b3ca6SJunchao Zhang   Collective
519dd5b3ca6SJunchao Zhang 
520dd5b3ca6SJunchao Zhang   Input Parameters:
521cab54364SBarry Smith + sf      - The `PetscSF`
522cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
523cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
524cab54364SBarry Smith 
525cab54364SBarry Smith   Level: intermediate
526dd5b3ca6SJunchao Zhang 
527dd5b3ca6SJunchao Zhang   Notes:
528cab54364SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map.
529dd5b3ca6SJunchao Zhang   n and N are local and global sizes of x respectively.
530dd5b3ca6SJunchao Zhang 
531cab54364SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to
532dd5b3ca6SJunchao Zhang   sequential vectors y on all ranks.
533dd5b3ca6SJunchao Zhang 
534cab54364SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a
535dd5b3ca6SJunchao Zhang   sequential vector y on rank 0.
536dd5b3ca6SJunchao Zhang 
537dd5b3ca6SJunchao Zhang   In above cases, entries of x are roots and entries of y are leaves.
538dd5b3ca6SJunchao Zhang 
539cab54364SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine
540dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
541cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
542dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
543cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
544dd5b3ca6SJunchao Zhang 
545dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
546dd5b3ca6SJunchao Zhang 
547cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
548dd5b3ca6SJunchao Zhang  @*/
549d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
550d71ae5a4SJacob Faibussowitsch {
551dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
552dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
553dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
554dd5b3ca6SJunchao Zhang   PetscSFType type;
555dd5b3ca6SJunchao Zhang 
556dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5572abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5582abc8c78SJacob Faibussowitsch   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2);
5599566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5602c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
563dd5b3ca6SJunchao Zhang 
564dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
565dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
5669566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
5679566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
5689566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
5699566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
570dd5b3ca6SJunchao Zhang   } else {
5719566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
5729566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
573dd5b3ca6SJunchao Zhang     res[0] = n;
574dd5b3ca6SJunchao Zhang     res[1] = -n;
575dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
5761c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
577dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
578dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
579dd5b3ca6SJunchao Zhang     } else {
580dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
581dd5b3ca6SJunchao Zhang     }
5829566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
583dd5b3ca6SJunchao Zhang   }
5849566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
585dd5b3ca6SJunchao Zhang 
586dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
587dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
588dd5b3ca6SJunchao Zhang 
589dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
590dd5b3ca6SJunchao Zhang      Also set other easy stuff.
591dd5b3ca6SJunchao Zhang    */
592dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
593dd5b3ca6SJunchao Zhang     sf->nleaves = N;
594dd5b3ca6SJunchao Zhang     sf->nroots  = n;
595dd5b3ca6SJunchao Zhang     sf->nranks  = size;
596dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
597dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
598dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
599dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
600dd5b3ca6SJunchao Zhang     sf->nroots  = n;
601dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
602dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
603dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
604dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
605dd5b3ca6SJunchao Zhang     sf->nleaves = size;
606dd5b3ca6SJunchao Zhang     sf->nroots  = size;
607dd5b3ca6SJunchao Zhang     sf->nranks  = size;
608dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
609dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
610dd5b3ca6SJunchao Zhang   }
611dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
612dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
613dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
614dd5b3ca6SJunchao Zhang }
615dd5b3ca6SJunchao Zhang 
616dd5b3ca6SJunchao Zhang /*@
617cab54364SBarry Smith    PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
61895fce210SBarry Smith 
61995fce210SBarry Smith    Collective
62095fce210SBarry Smith 
6214165533cSJose E. Roman    Input Parameter:
62295fce210SBarry Smith .  sf - star forest to invert
62395fce210SBarry Smith 
6244165533cSJose E. Roman    Output Parameter:
62595fce210SBarry Smith .  isf - inverse of sf
6264165533cSJose E. Roman 
62795fce210SBarry Smith    Level: advanced
62895fce210SBarry Smith 
62995fce210SBarry Smith    Notes:
63095fce210SBarry Smith    All roots must have degree 1.
63195fce210SBarry Smith 
63295fce210SBarry Smith    The local space may be a permutation, but cannot be sparse.
63395fce210SBarry Smith 
634cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFSetGraph()`
63595fce210SBarry Smith @*/
636d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
637d71ae5a4SJacob Faibussowitsch {
63895fce210SBarry Smith   PetscMPIInt     rank;
63995fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
64095fce210SBarry Smith   const PetscInt *ilocal;
64195fce210SBarry Smith   PetscSFNode    *roots, *leaves;
64295fce210SBarry Smith 
64395fce210SBarry Smith   PetscFunctionBegin;
64429046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
64529046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
64629046d53SLisandro Dalcin   PetscValidPointer(isf, 2);
64729046d53SLisandro Dalcin 
6489566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
64929046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
65029046d53SLisandro Dalcin 
6519566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
653ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
65495fce210SBarry Smith     leaves[i].rank  = rank;
65595fce210SBarry Smith     leaves[i].index = i;
65695fce210SBarry Smith   }
65795fce210SBarry Smith   for (i = 0; i < nroots; i++) {
65895fce210SBarry Smith     roots[i].rank  = -1;
65995fce210SBarry Smith     roots[i].index = -1;
66095fce210SBarry Smith   }
6619566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
6629566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
66395fce210SBarry Smith 
66495fce210SBarry Smith   /* Check whether our leaves are sparse */
6659371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
6669371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
66795fce210SBarry Smith   if (count == nroots) newilocal = NULL;
6689371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
66995fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
67095fce210SBarry Smith       if (roots[i].rank >= 0) {
67195fce210SBarry Smith         newilocal[count]   = i;
67295fce210SBarry Smith         roots[count].rank  = roots[i].rank;
67395fce210SBarry Smith         roots[count].index = roots[i].index;
67495fce210SBarry Smith         count++;
67595fce210SBarry Smith       }
67695fce210SBarry Smith     }
67795fce210SBarry Smith   }
67895fce210SBarry Smith 
6799566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
6809566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
6819566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
68295fce210SBarry Smith   PetscFunctionReturn(0);
68395fce210SBarry Smith }
68495fce210SBarry Smith 
68595fce210SBarry Smith /*@
686cab54364SBarry Smith    PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
68795fce210SBarry Smith 
68895fce210SBarry Smith    Collective
68995fce210SBarry Smith 
6904165533cSJose E. Roman    Input Parameters:
69195fce210SBarry Smith +  sf - communication object to duplicate
692cab54364SBarry Smith -  opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
69395fce210SBarry Smith 
6944165533cSJose E. Roman    Output Parameter:
69595fce210SBarry Smith .  newsf - new communication object
69695fce210SBarry Smith 
69795fce210SBarry Smith    Level: beginner
69895fce210SBarry Smith 
699cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
70095fce210SBarry Smith @*/
701d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
702d71ae5a4SJacob Faibussowitsch {
70329046d53SLisandro Dalcin   PetscSFType  type;
70497929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
70595fce210SBarry Smith 
70695fce210SBarry Smith   PetscFunctionBegin;
70729046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
70829046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
70929046d53SLisandro Dalcin   PetscValidPointer(newsf, 3);
7109566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7119566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7129566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
71354a24607SJunchao Zhang   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
71495fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
715dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
716dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
71795fce210SBarry Smith       PetscInt           nroots, nleaves;
71895fce210SBarry Smith       const PetscInt    *ilocal;
71995fce210SBarry Smith       const PetscSFNode *iremote;
7209566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7219566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
722dd5b3ca6SJunchao Zhang     } else {
7239566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
724dd5b3ca6SJunchao Zhang     }
72595fce210SBarry Smith   }
72697929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7279566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
72897929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
72997929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
73097929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
73197929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
73297929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
73397929ea7SJunchao Zhang 
73420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
73520c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
73671438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
73720c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
73820c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
73920c24465SJunchao Zhang #endif
740dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
74120c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
74295fce210SBarry Smith   PetscFunctionReturn(0);
74395fce210SBarry Smith }
74495fce210SBarry Smith 
74595fce210SBarry Smith /*@C
74695fce210SBarry Smith    PetscSFGetGraph - Get the graph specifying a parallel star forest
74795fce210SBarry Smith 
74895fce210SBarry Smith    Not Collective
74995fce210SBarry Smith 
7504165533cSJose E. Roman    Input Parameter:
75195fce210SBarry Smith .  sf - star forest
75295fce210SBarry Smith 
7534165533cSJose E. Roman    Output Parameters:
75495fce210SBarry Smith +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
75595fce210SBarry Smith .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
756bc6585dcSJunchao Zhang .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
75795fce210SBarry Smith -  iremote - remote locations of root vertices for each leaf on the current process
75895fce210SBarry Smith 
759cab54364SBarry Smith    Level: intermediate
760cab54364SBarry Smith 
761373e0d91SLisandro Dalcin    Notes:
762373e0d91SLisandro Dalcin      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
763373e0d91SLisandro Dalcin 
764cab54364SBarry Smith      The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`,
765db2b9530SVaclav Hapla      see its manpage for details.
766db2b9530SVaclav Hapla 
7678dbb0df6SBarry Smith    Fortran Notes:
7688dbb0df6SBarry Smith      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
769cab54364SBarry Smith      want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array.
7708dbb0df6SBarry Smith 
7718dbb0df6SBarry Smith      To check for a NULL ilocal use
7728dbb0df6SBarry Smith $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
773ca797d7aSLawrence Mitchell 
774cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
77595fce210SBarry Smith @*/
776d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
777d71ae5a4SJacob Faibussowitsch {
77895fce210SBarry Smith   PetscFunctionBegin;
77995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
780b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
7819566063dSJacob Faibussowitsch     PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
782b8dee149SJunchao Zhang   } else {
78395fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
78495fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
78595fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
78695fce210SBarry Smith     if (iremote) *iremote = sf->remote;
787b8dee149SJunchao Zhang   }
78895fce210SBarry Smith   PetscFunctionReturn(0);
78995fce210SBarry Smith }
79095fce210SBarry Smith 
79129046d53SLisandro Dalcin /*@
79295fce210SBarry Smith    PetscSFGetLeafRange - Get the active leaf ranges
79395fce210SBarry Smith 
79495fce210SBarry Smith    Not Collective
79595fce210SBarry Smith 
7964165533cSJose E. Roman    Input Parameter:
79795fce210SBarry Smith .  sf - star forest
79895fce210SBarry Smith 
7994165533cSJose E. Roman    Output Parameters:
800dd5b3ca6SJunchao Zhang +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
801dd5b3ca6SJunchao Zhang -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
80295fce210SBarry Smith 
80395fce210SBarry Smith    Level: developer
80495fce210SBarry Smith 
805cab54364SBarry Smith .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
80695fce210SBarry Smith @*/
807d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
808d71ae5a4SJacob Faibussowitsch {
80995fce210SBarry Smith   PetscFunctionBegin;
81095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
81129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
81295fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
81395fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
81495fce210SBarry Smith   PetscFunctionReturn(0);
81595fce210SBarry Smith }
81695fce210SBarry Smith 
81795fce210SBarry Smith /*@C
818cab54364SBarry Smith    PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
819fe2efc57SMark 
820cab54364SBarry Smith    Collective on A
821fe2efc57SMark 
822fe2efc57SMark    Input Parameters:
823fe2efc57SMark +  A - the star forest
824cab54364SBarry Smith .  obj - Optional object that provides the prefix for the option names
825736c3998SJose E. Roman -  name - command line option
826fe2efc57SMark 
827fe2efc57SMark    Level: intermediate
828cab54364SBarry Smith 
829db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
830fe2efc57SMark @*/
831d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
832d71ae5a4SJacob Faibussowitsch {
833fe2efc57SMark   PetscFunctionBegin;
834fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8359566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
836fe2efc57SMark   PetscFunctionReturn(0);
837fe2efc57SMark }
838fe2efc57SMark 
839fe2efc57SMark /*@C
84095fce210SBarry Smith    PetscSFView - view a star forest
84195fce210SBarry Smith 
84295fce210SBarry Smith    Collective
84395fce210SBarry Smith 
8444165533cSJose E. Roman    Input Parameters:
84595fce210SBarry Smith +  sf - star forest
846cab54364SBarry Smith -  viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
84795fce210SBarry Smith 
84895fce210SBarry Smith    Level: beginner
84995fce210SBarry Smith 
850cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
85195fce210SBarry Smith @*/
852d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
853d71ae5a4SJacob Faibussowitsch {
85495fce210SBarry Smith   PetscBool         iascii;
85595fce210SBarry Smith   PetscViewerFormat format;
85695fce210SBarry Smith 
85795fce210SBarry Smith   PetscFunctionBegin;
85895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8599566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
86095fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
86195fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8629566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8639566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
86453dd6d7dSJunchao Zhang   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
86595fce210SBarry Smith     PetscMPIInt rank;
86681bfa7aaSJed Brown     PetscInt    ii, i, j;
86795fce210SBarry Smith 
8689566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
8699566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
870dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
87180153354SVaclav Hapla       if (!sf->graphset) {
8729566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
8739566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
87480153354SVaclav Hapla         PetscFunctionReturn(0);
87580153354SVaclav Hapla       }
8769566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
8779566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
8789566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
87948a46eb9SPierre Jolivet       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
8809566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
8819566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
88295fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
88381bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
8849566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
8859566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
88681bfa7aaSJed Brown         for (i = 0; i < sf->nranks; i++) perm[i] = i;
8879566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
8889566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
88981bfa7aaSJed Brown         for (ii = 0; ii < sf->nranks; ii++) {
89081bfa7aaSJed Brown           i = perm[ii];
8919566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
89248a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
89395fce210SBarry Smith         }
8949566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
89595fce210SBarry Smith       }
8969566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
8979566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
898dd5b3ca6SJunchao Zhang     }
8999566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
90095fce210SBarry Smith   }
901dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
90295fce210SBarry Smith   PetscFunctionReturn(0);
90395fce210SBarry Smith }
90495fce210SBarry Smith 
90595fce210SBarry Smith /*@C
906dec1416fSJunchao Zhang    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
90795fce210SBarry Smith 
90895fce210SBarry Smith    Not Collective
90995fce210SBarry Smith 
9104165533cSJose E. Roman    Input Parameter:
91195fce210SBarry Smith .  sf - star forest
91295fce210SBarry Smith 
9134165533cSJose E. Roman    Output Parameters:
91495fce210SBarry Smith +  nranks - number of ranks referenced by local part
91595fce210SBarry Smith .  ranks - array of ranks
91695fce210SBarry Smith .  roffset - offset in rmine/rremote for each rank (length nranks+1)
91795fce210SBarry Smith .  rmine - concatenated array holding local indices referencing each remote rank
91895fce210SBarry Smith -  rremote - concatenated array holding remote indices referenced for each remote rank
91995fce210SBarry Smith 
92095fce210SBarry Smith    Level: developer
92195fce210SBarry Smith 
922cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
92395fce210SBarry Smith @*/
924d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
925d71ae5a4SJacob Faibussowitsch {
92695fce210SBarry Smith   PetscFunctionBegin;
92795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
92828b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
929dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9309566063dSJacob Faibussowitsch     PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
931dec1416fSJunchao Zhang   } else {
932dec1416fSJunchao Zhang     /* The generic implementation */
93395fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
93495fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
93595fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
93695fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
93795fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
938dec1416fSJunchao Zhang   }
93995fce210SBarry Smith   PetscFunctionReturn(0);
94095fce210SBarry Smith }
94195fce210SBarry Smith 
9428750ddebSJunchao Zhang /*@C
9438750ddebSJunchao Zhang    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9448750ddebSJunchao Zhang 
9458750ddebSJunchao Zhang    Not Collective
9468750ddebSJunchao Zhang 
9474165533cSJose E. Roman    Input Parameter:
9488750ddebSJunchao Zhang .  sf - star forest
9498750ddebSJunchao Zhang 
9504165533cSJose E. Roman    Output Parameters:
9518750ddebSJunchao Zhang +  niranks - number of leaf ranks referencing roots on this process
9528750ddebSJunchao Zhang .  iranks - array of ranks
9538750ddebSJunchao Zhang .  ioffset - offset in irootloc for each rank (length niranks+1)
9548750ddebSJunchao Zhang -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
9558750ddebSJunchao Zhang 
9568750ddebSJunchao Zhang    Level: developer
9578750ddebSJunchao Zhang 
958cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9598750ddebSJunchao Zhang @*/
960d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
961d71ae5a4SJacob Faibussowitsch {
9628750ddebSJunchao Zhang   PetscFunctionBegin;
9638750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
96428b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
9658750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
9669566063dSJacob Faibussowitsch     PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
9678750ddebSJunchao Zhang   } else {
9688750ddebSJunchao Zhang     PetscSFType type;
9699566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
97098921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
9718750ddebSJunchao Zhang   }
9728750ddebSJunchao Zhang   PetscFunctionReturn(0);
9738750ddebSJunchao Zhang }
9748750ddebSJunchao Zhang 
975d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
976d71ae5a4SJacob Faibussowitsch {
977b5a8e515SJed Brown   PetscInt i;
978b5a8e515SJed Brown   for (i = 0; i < n; i++) {
979b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
980b5a8e515SJed Brown   }
981b5a8e515SJed Brown   return PETSC_FALSE;
982b5a8e515SJed Brown }
983b5a8e515SJed Brown 
98495fce210SBarry Smith /*@C
985cab54364SBarry Smith    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
98621c688dcSJed Brown 
98721c688dcSJed Brown    Collective
98821c688dcSJed Brown 
9894165533cSJose E. Roman    Input Parameters:
990cab54364SBarry Smith +  sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
991cab54364SBarry Smith -  dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
99221c688dcSJed Brown 
99321c688dcSJed Brown    Level: developer
99421c688dcSJed Brown 
995cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
99621c688dcSJed Brown @*/
997d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
998d71ae5a4SJacob Faibussowitsch {
99921c688dcSJed Brown   PetscTable         table;
100021c688dcSJed Brown   PetscTablePosition pos;
1001b5a8e515SJed Brown   PetscMPIInt        size, groupsize, *groupranks;
1002247e8311SStefano Zampini   PetscInt          *rcount, *ranks;
1003247e8311SStefano Zampini   PetscInt           i, irank = -1, orank = -1;
100421c688dcSJed Brown 
100521c688dcSJed Brown   PetscFunctionBegin;
100621c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
100729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
10099566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(10, size, &table));
101021c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
101121c688dcSJed Brown     /* Log 1-based rank */
10129566063dSJacob Faibussowitsch     PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
101321c688dcSJed Brown   }
10149566063dSJacob Faibussowitsch   PetscCall(PetscTableGetCount(table, &sf->nranks));
10159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
10179566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(table, &pos));
101821c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10199566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]));
102021c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
102121c688dcSJed Brown   }
10229566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&table));
1023b5a8e515SJed Brown 
1024b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1025b5a8e515SJed Brown   {
10267fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1027b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10289566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10299566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10309566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10319566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1032b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10339566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10349566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10359566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1036b5a8e515SJed Brown   }
1037b5a8e515SJed Brown 
1038b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1040b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1041b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1042b5a8e515SJed Brown     }
1043b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1045b5a8e515SJed Brown     }
1046b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1047b5a8e515SJed Brown       PetscInt tmprank, tmpcount;
1048247e8311SStefano Zampini 
1049b5a8e515SJed Brown       tmprank             = ranks[i];
1050b5a8e515SJed Brown       tmpcount            = rcount[i];
1051b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1052b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1053b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1054b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1055b5a8e515SJed Brown       sf->ndranks++;
1056b5a8e515SJed Brown     }
1057b5a8e515SJed Brown   }
10589566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
10599566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
10609566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
106121c688dcSJed Brown   sf->roffset[0] = 0;
106221c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10639566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
106421c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
106521c688dcSJed Brown     rcount[i]          = 0;
106621c688dcSJed Brown   }
1067247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1068247e8311SStefano Zampini     /* short circuit */
1069247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
107021c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
10719566063dSJacob Faibussowitsch       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1072b5a8e515SJed Brown       if (irank < 0) {
10739566063dSJacob Faibussowitsch         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1074b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
107521c688dcSJed Brown       }
1076247e8311SStefano Zampini       orank = sf->remote[i].rank;
1077247e8311SStefano Zampini     }
107808401ef6SPierre Jolivet     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
107921c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
108021c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
108121c688dcSJed Brown     rcount[irank]++;
108221c688dcSJed Brown   }
10839566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
108421c688dcSJed Brown   PetscFunctionReturn(0);
108521c688dcSJed Brown }
108621c688dcSJed Brown 
108721c688dcSJed Brown /*@C
108895fce210SBarry Smith    PetscSFGetGroups - gets incoming and outgoing process groups
108995fce210SBarry Smith 
109095fce210SBarry Smith    Collective
109195fce210SBarry Smith 
10924165533cSJose E. Roman    Input Parameter:
109395fce210SBarry Smith .  sf - star forest
109495fce210SBarry Smith 
10954165533cSJose E. Roman    Output Parameters:
109695fce210SBarry Smith +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
109795fce210SBarry Smith -  outgoing - group of destination processes for outgoing edges (roots that I reference)
109895fce210SBarry Smith 
109995fce210SBarry Smith    Level: developer
110095fce210SBarry Smith 
1101cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
110295fce210SBarry Smith @*/
1103d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1104d71ae5a4SJacob Faibussowitsch {
11057fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
110695fce210SBarry Smith 
110795fce210SBarry Smith   PetscFunctionBegin;
110808401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
110995fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
111095fce210SBarry Smith     PetscInt        i;
111195fce210SBarry Smith     const PetscInt *indegree;
111295fce210SBarry Smith     PetscMPIInt     rank, *outranks, *inranks;
111395fce210SBarry Smith     PetscSFNode    *remote;
111495fce210SBarry Smith     PetscSF         bgcount;
111595fce210SBarry Smith 
111695fce210SBarry Smith     /* Compute the number of incoming ranks */
11179566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
111895fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
111995fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
112095fce210SBarry Smith       remote[i].index = 0;
112195fce210SBarry Smith     }
11229566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11239566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11249566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11259566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
112695fce210SBarry Smith     /* Enumerate the incoming ranks */
11279566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11289566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
112995fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11309566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11319566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11329566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11339566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
11349566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11359566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11369566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
113795fce210SBarry Smith   }
113895fce210SBarry Smith   *incoming = sf->ingroup;
113995fce210SBarry Smith 
114095fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11419566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11429566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11439566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
114495fce210SBarry Smith   }
114595fce210SBarry Smith   *outgoing = sf->outgroup;
114695fce210SBarry Smith   PetscFunctionReturn(0);
114795fce210SBarry Smith }
114895fce210SBarry Smith 
114929046d53SLisandro Dalcin /*@
1150cab54364SBarry Smith    PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
115195fce210SBarry Smith 
115295fce210SBarry Smith    Collective
115395fce210SBarry Smith 
11544165533cSJose E. Roman    Input Parameter:
115595fce210SBarry Smith .  sf - star forest that may contain roots with 0 or with more than 1 vertex
115695fce210SBarry Smith 
11574165533cSJose E. Roman    Output Parameter:
115895fce210SBarry Smith .  multi - star forest with split roots, such that each root has degree exactly 1
115995fce210SBarry Smith 
116095fce210SBarry Smith    Level: developer
116195fce210SBarry Smith 
1162cab54364SBarry Smith    Note:
1163cab54364SBarry Smith    In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
116495fce210SBarry Smith    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
116595fce210SBarry Smith    edge, it is a candidate for future optimization that might involve its removal.
116695fce210SBarry Smith 
1167cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
116895fce210SBarry Smith @*/
1169d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1170d71ae5a4SJacob Faibussowitsch {
117195fce210SBarry Smith   PetscFunctionBegin;
117295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
117395fce210SBarry Smith   PetscValidPointer(multi, 2);
117495fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
11759566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
117695fce210SBarry Smith     *multi           = sf->multi;
1177013b3241SStefano Zampini     sf->multi->multi = sf->multi;
117895fce210SBarry Smith     PetscFunctionReturn(0);
117995fce210SBarry Smith   }
118095fce210SBarry Smith   if (!sf->multi) {
118195fce210SBarry Smith     const PetscInt *indegree;
11829837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
118395fce210SBarry Smith     PetscSFNode    *remote;
118429046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
11859566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
11869566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
11879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
118895fce210SBarry Smith     inoffset[0] = 0;
118995fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
11909837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
11919566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
11929566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
119395fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
119476bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1195ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
119676bd3646SJed Brown     }
11979566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
119895fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
119995fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
120038e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
120195fce210SBarry Smith     }
12029566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1203013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12049566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
120595fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
120695fce210SBarry Smith       PetscMPIInt  rank;
120795fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
120895fce210SBarry Smith       PetscSFNode *newremote;
12099566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
121095fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12119566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12129837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12139566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12149566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
121595fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
121695fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
121795fce210SBarry Smith         PetscInt j;
121895fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
12199566063dSJacob Faibussowitsch         PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
122095fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
122195fce210SBarry Smith       }
12229566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12239566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12249566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
122595fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
122695fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
122701365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
122895fce210SBarry Smith       }
12299566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
12309566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
123195fce210SBarry Smith     }
12329566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
123395fce210SBarry Smith   }
123495fce210SBarry Smith   *multi = sf->multi;
123595fce210SBarry Smith   PetscFunctionReturn(0);
123695fce210SBarry Smith }
123795fce210SBarry Smith 
123895fce210SBarry Smith /*@C
123972502a1fSJunchao Zhang    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
124095fce210SBarry Smith 
124195fce210SBarry Smith    Collective
124295fce210SBarry Smith 
12434165533cSJose E. Roman    Input Parameters:
124495fce210SBarry Smith +  sf - original star forest
1245ba2a7774SJunchao Zhang .  nselected  - number of selected roots on this process
1246ba2a7774SJunchao Zhang -  selected   - indices of the selected roots on this process
124795fce210SBarry Smith 
12484165533cSJose E. Roman    Output Parameter:
1249cd620004SJunchao Zhang .  esf - new star forest
125095fce210SBarry Smith 
125195fce210SBarry Smith    Level: advanced
125295fce210SBarry Smith 
125395fce210SBarry Smith    Note:
1254cab54364SBarry Smith    To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
125595fce210SBarry Smith    be done by calling PetscSFGetGraph().
125695fce210SBarry Smith 
1257cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
125895fce210SBarry Smith @*/
1259d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1260d71ae5a4SJacob Faibussowitsch {
1261cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1262cd620004SJunchao Zhang   const PetscInt    *ilocal;
1263cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1264ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1265f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1266f659e5c7SJunchao Zhang   MPI_Comm           comm;
126795fce210SBarry Smith 
126895fce210SBarry Smith   PetscFunctionBegin;
126995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
127029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
1271dadcf809SJacob Faibussowitsch   if (nselected) PetscValidIntPointer(selected, 3);
1272cd620004SJunchao Zhang   PetscValidPointer(esf, 4);
12730511a646SMatthew G. Knepley 
12749566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
12759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
12769566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
12779566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1278cd620004SJunchao Zhang 
127976bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1280cd620004SJunchao Zhang     PetscBool dups;
12819566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
128228b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
12839371c9d4SSatish Balay     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1284cd620004SJunchao Zhang   }
1285f659e5c7SJunchao Zhang 
1286dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1287dbbe0bcdSBarry Smith   else {
1288cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
12899566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1290cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
12919566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1292cd620004SJunchao Zhang     leafdata = leafmem - minleaf;
1293cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1294cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
12959566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
12969566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1297ba2a7774SJunchao Zhang 
1298cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1299cd620004SJunchao Zhang     esf_nleaves = 0;
1300cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1301cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1302cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1303cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1304cd620004SJunchao Zhang       */
1305cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1306cd620004SJunchao Zhang     }
13079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1309cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1310cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1311cd620004SJunchao Zhang       if (leafdata[j]) {
1312cd620004SJunchao Zhang         new_ilocal[n]        = j;
1313cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1314cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1315fc1ede2bSMatthew G. Knepley         ++n;
131695fce210SBarry Smith       }
131795fce210SBarry Smith     }
13189566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
13199566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
13209566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
13219566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1322f659e5c7SJunchao Zhang   }
13239566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
132495fce210SBarry Smith   PetscFunctionReturn(0);
132595fce210SBarry Smith }
132695fce210SBarry Smith 
13272f5fb4c2SMatthew G. Knepley /*@C
13282f5fb4c2SMatthew G. Knepley   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
13292f5fb4c2SMatthew G. Knepley 
13302f5fb4c2SMatthew G. Knepley   Collective
13312f5fb4c2SMatthew G. Knepley 
13324165533cSJose E. Roman   Input Parameters:
13332f5fb4c2SMatthew G. Knepley + sf - original star forest
1334f659e5c7SJunchao Zhang . nselected  - number of selected leaves on this process
1335f659e5c7SJunchao Zhang - selected   - indices of the selected leaves on this process
13362f5fb4c2SMatthew G. Knepley 
13374165533cSJose E. Roman   Output Parameter:
13382f5fb4c2SMatthew G. Knepley .  newsf - new star forest
13392f5fb4c2SMatthew G. Knepley 
13402f5fb4c2SMatthew G. Knepley   Level: advanced
13412f5fb4c2SMatthew G. Knepley 
1342cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
13432f5fb4c2SMatthew G. Knepley @*/
1344d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1345d71ae5a4SJacob Faibussowitsch {
1346f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1347f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1348f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1349f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1350f659e5c7SJunchao Zhang   MPI_Comm           comm;
13512f5fb4c2SMatthew G. Knepley 
13522f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
13532f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
135429046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
1355dadcf809SJacob Faibussowitsch   if (nselected) PetscValidIntPointer(selected, 3);
13562f5fb4c2SMatthew G. Knepley   PetscValidPointer(newsf, 4);
13572f5fb4c2SMatthew G. Knepley 
1358f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
13599566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
13619566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
13629566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
136308401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1364f659e5c7SJunchao Zhang 
1365f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1366dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1367dbbe0bcdSBarry Smith   else {
13689566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
13699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
13709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1371f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1372f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1373f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1374f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1375f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
13762f5fb4c2SMatthew G. Knepley     }
13779566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
13789566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1379f659e5c7SJunchao Zhang   }
13809566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
13812f5fb4c2SMatthew G. Knepley   PetscFunctionReturn(0);
13822f5fb4c2SMatthew G. Knepley }
13832f5fb4c2SMatthew G. Knepley 
138495fce210SBarry Smith /*@C
1385cab54364SBarry Smith    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
13863482bfa8SJunchao Zhang 
1387cab54364SBarry Smith    Collective on sf
13883482bfa8SJunchao Zhang 
13894165533cSJose E. Roman    Input Parameters:
13903482bfa8SJunchao Zhang +  sf - star forest on which to communicate
13913482bfa8SJunchao Zhang .  unit - data type associated with each node
13923482bfa8SJunchao Zhang .  rootdata - buffer to broadcast
13933482bfa8SJunchao Zhang -  op - operation to use for reduction
13943482bfa8SJunchao Zhang 
13954165533cSJose E. Roman    Output Parameter:
13963482bfa8SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
13973482bfa8SJunchao Zhang 
13983482bfa8SJunchao Zhang    Level: intermediate
13993482bfa8SJunchao Zhang 
1400d0295fc0SJunchao Zhang    Notes:
1401d0295fc0SJunchao Zhang     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1402d0295fc0SJunchao Zhang     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1403cab54364SBarry Smith     use `PetscSFBcastWithMemTypeBegin()` instead.
1404cab54364SBarry Smith 
1405cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14063482bfa8SJunchao Zhang @*/
1407d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1408d71ae5a4SJacob Faibussowitsch {
1409eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14103482bfa8SJunchao Zhang 
14113482bfa8SJunchao Zhang   PetscFunctionBegin;
14123482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14139566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14149566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14159566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14169566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1417dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14189566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
14193482bfa8SJunchao Zhang   PetscFunctionReturn(0);
14203482bfa8SJunchao Zhang }
14213482bfa8SJunchao Zhang 
14223482bfa8SJunchao Zhang /*@C
1423cab54364SBarry Smith    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()`
1424d0295fc0SJunchao Zhang 
1425cab54364SBarry Smith    Collective on sf
1426d0295fc0SJunchao Zhang 
14274165533cSJose E. Roman    Input Parameters:
1428d0295fc0SJunchao Zhang +  sf - star forest on which to communicate
1429d0295fc0SJunchao Zhang .  unit - data type associated with each node
1430d0295fc0SJunchao Zhang .  rootmtype - memory type of rootdata
1431d0295fc0SJunchao Zhang .  rootdata - buffer to broadcast
1432d0295fc0SJunchao Zhang .  leafmtype - memory type of leafdata
1433d0295fc0SJunchao Zhang -  op - operation to use for reduction
1434d0295fc0SJunchao Zhang 
14354165533cSJose E. Roman    Output Parameter:
1436d0295fc0SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
1437d0295fc0SJunchao Zhang 
1438d0295fc0SJunchao Zhang    Level: intermediate
1439d0295fc0SJunchao Zhang 
1440cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1441d0295fc0SJunchao Zhang @*/
1442d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1443d71ae5a4SJacob Faibussowitsch {
1444d0295fc0SJunchao Zhang   PetscFunctionBegin;
1445d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14469566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14479566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1448dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14499566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1450d0295fc0SJunchao Zhang   PetscFunctionReturn(0);
1451d0295fc0SJunchao Zhang }
1452d0295fc0SJunchao Zhang 
1453d0295fc0SJunchao Zhang /*@C
1454cab54364SBarry Smith    PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()`
14553482bfa8SJunchao Zhang 
14563482bfa8SJunchao Zhang    Collective
14573482bfa8SJunchao Zhang 
14584165533cSJose E. Roman    Input Parameters:
14593482bfa8SJunchao Zhang +  sf - star forest
14603482bfa8SJunchao Zhang .  unit - data type
14613482bfa8SJunchao Zhang .  rootdata - buffer to broadcast
14623482bfa8SJunchao Zhang -  op - operation to use for reduction
14633482bfa8SJunchao Zhang 
14644165533cSJose E. Roman    Output Parameter:
14653482bfa8SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
14663482bfa8SJunchao Zhang 
14673482bfa8SJunchao Zhang    Level: intermediate
14683482bfa8SJunchao Zhang 
1469cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
14703482bfa8SJunchao Zhang @*/
1471d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1472d71ae5a4SJacob Faibussowitsch {
14733482bfa8SJunchao Zhang   PetscFunctionBegin;
14743482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14759566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1476dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
14779566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
14783482bfa8SJunchao Zhang   PetscFunctionReturn(0);
14793482bfa8SJunchao Zhang }
14803482bfa8SJunchao Zhang 
14813482bfa8SJunchao Zhang /*@C
1482cab54364SBarry Smith    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
148395fce210SBarry Smith 
148495fce210SBarry Smith    Collective
148595fce210SBarry Smith 
14864165533cSJose E. Roman    Input Parameters:
148795fce210SBarry Smith +  sf - star forest
148895fce210SBarry Smith .  unit - data type
148995fce210SBarry Smith .  leafdata - values to reduce
149095fce210SBarry Smith -  op - reduction operation
149195fce210SBarry Smith 
14924165533cSJose E. Roman    Output Parameter:
149395fce210SBarry Smith .  rootdata - result of reduction of values from all leaves of each root
149495fce210SBarry Smith 
149595fce210SBarry Smith    Level: intermediate
149695fce210SBarry Smith 
1497d0295fc0SJunchao Zhang    Notes:
1498d0295fc0SJunchao Zhang     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1499d0295fc0SJunchao Zhang     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1500cab54364SBarry Smith     use `PetscSFReduceWithMemTypeBegin()` instead.
1501d0295fc0SJunchao Zhang 
1502cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
150395fce210SBarry Smith @*/
1504d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1505d71ae5a4SJacob Faibussowitsch {
1506eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
150795fce210SBarry Smith 
150895fce210SBarry Smith   PetscFunctionBegin;
150995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15109566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15119566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15129566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15139566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
15149566063dSJacob Faibussowitsch   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15159566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
151695fce210SBarry Smith   PetscFunctionReturn(0);
151795fce210SBarry Smith }
151895fce210SBarry Smith 
151995fce210SBarry Smith /*@C
1520cab54364SBarry Smith    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1521d0295fc0SJunchao Zhang 
1522d0295fc0SJunchao Zhang    Collective
1523d0295fc0SJunchao Zhang 
15244165533cSJose E. Roman    Input Parameters:
1525d0295fc0SJunchao Zhang +  sf - star forest
1526d0295fc0SJunchao Zhang .  unit - data type
1527d0295fc0SJunchao Zhang .  leafmtype - memory type of leafdata
1528d0295fc0SJunchao Zhang .  leafdata - values to reduce
1529d0295fc0SJunchao Zhang .  rootmtype - memory type of rootdata
1530d0295fc0SJunchao Zhang -  op - reduction operation
1531d0295fc0SJunchao Zhang 
15324165533cSJose E. Roman    Output Parameter:
1533d0295fc0SJunchao Zhang .  rootdata - result of reduction of values from all leaves of each root
1534d0295fc0SJunchao Zhang 
1535d0295fc0SJunchao Zhang    Level: intermediate
1536d0295fc0SJunchao Zhang 
1537cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1538d0295fc0SJunchao Zhang @*/
1539d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1540d71ae5a4SJacob Faibussowitsch {
1541d0295fc0SJunchao Zhang   PetscFunctionBegin;
1542d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15439566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15449566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15459566063dSJacob Faibussowitsch   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15469566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1547d0295fc0SJunchao Zhang   PetscFunctionReturn(0);
1548d0295fc0SJunchao Zhang }
1549d0295fc0SJunchao Zhang 
1550d0295fc0SJunchao Zhang /*@C
1551cab54364SBarry Smith    PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()`
155295fce210SBarry Smith 
155395fce210SBarry Smith    Collective
155495fce210SBarry Smith 
15554165533cSJose E. Roman    Input Parameters:
155695fce210SBarry Smith +  sf - star forest
155795fce210SBarry Smith .  unit - data type
155895fce210SBarry Smith .  leafdata - values to reduce
155995fce210SBarry Smith -  op - reduction operation
156095fce210SBarry Smith 
15614165533cSJose E. Roman    Output Parameter:
156295fce210SBarry Smith .  rootdata - result of reduction of values from all leaves of each root
156395fce210SBarry Smith 
156495fce210SBarry Smith    Level: intermediate
156595fce210SBarry Smith 
1566cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`
156795fce210SBarry Smith @*/
1568d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1569d71ae5a4SJacob Faibussowitsch {
157095fce210SBarry Smith   PetscFunctionBegin;
157195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15729566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1573dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
15749566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
157595fce210SBarry Smith   PetscFunctionReturn(0);
157695fce210SBarry Smith }
157795fce210SBarry Smith 
157895fce210SBarry Smith /*@C
1579cab54364SBarry Smith    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1580cab54364SBarry Smith    to be completed with `PetscSFFetchAndOpEnd()`
1581a1729e3fSJunchao Zhang 
1582a1729e3fSJunchao Zhang    Collective
1583a1729e3fSJunchao Zhang 
15844165533cSJose E. Roman    Input Parameters:
1585a1729e3fSJunchao Zhang +  sf - star forest
1586a1729e3fSJunchao Zhang .  unit - data type
1587a1729e3fSJunchao Zhang .  leafdata - leaf values to use in reduction
1588a1729e3fSJunchao Zhang -  op - operation to use for reduction
1589a1729e3fSJunchao Zhang 
15904165533cSJose E. Roman    Output Parameters:
1591a1729e3fSJunchao Zhang +  rootdata - root values to be updated, input state is seen by first process to perform an update
1592a1729e3fSJunchao Zhang -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1593a1729e3fSJunchao Zhang 
1594a1729e3fSJunchao Zhang    Level: advanced
1595a1729e3fSJunchao Zhang 
1596a1729e3fSJunchao Zhang    Note:
1597a1729e3fSJunchao Zhang    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1598a1729e3fSJunchao Zhang    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1599a1729e3fSJunchao Zhang    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1600a1729e3fSJunchao Zhang    integers.
1601a1729e3fSJunchao Zhang 
1602cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1603a1729e3fSJunchao Zhang @*/
1604d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1605d71ae5a4SJacob Faibussowitsch {
1606eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1607a1729e3fSJunchao Zhang 
1608a1729e3fSJunchao Zhang   PetscFunctionBegin;
1609a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16109566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16119566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16129566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16139566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16149566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
161508401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1616dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16179566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1618a1729e3fSJunchao Zhang   PetscFunctionReturn(0);
1619a1729e3fSJunchao Zhang }
1620a1729e3fSJunchao Zhang 
1621a1729e3fSJunchao Zhang /*@C
1622cab54364SBarry Smith    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1623cab54364SBarry Smith    applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1624d3b3e55cSJunchao Zhang 
1625d3b3e55cSJunchao Zhang    Collective
1626d3b3e55cSJunchao Zhang 
1627d3b3e55cSJunchao Zhang    Input Parameters:
1628d3b3e55cSJunchao Zhang +  sf - star forest
1629d3b3e55cSJunchao Zhang .  unit - data type
1630d3b3e55cSJunchao Zhang .  rootmtype - memory type of rootdata
1631d3b3e55cSJunchao Zhang .  leafmtype - memory type of leafdata
1632d3b3e55cSJunchao Zhang .  leafdata - leaf values to use in reduction
1633d3b3e55cSJunchao Zhang .  leafupdatemtype - memory type of leafupdate
1634d3b3e55cSJunchao Zhang -  op - operation to use for reduction
1635d3b3e55cSJunchao Zhang 
1636d3b3e55cSJunchao Zhang    Output Parameters:
1637d3b3e55cSJunchao Zhang +  rootdata - root values to be updated, input state is seen by first process to perform an update
1638d3b3e55cSJunchao Zhang -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1639d3b3e55cSJunchao Zhang 
1640d3b3e55cSJunchao Zhang    Level: advanced
1641d3b3e55cSJunchao Zhang 
1642cab54364SBarry Smith    Note:
1643cab54364SBarry Smith    See `PetscSFFetchAndOpBegin()` for more details.
1644d3b3e55cSJunchao Zhang 
1645cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1646d3b3e55cSJunchao Zhang @*/
1647d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1648d71ae5a4SJacob Faibussowitsch {
1649d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1650d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16519566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16529566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
165308401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1654dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16559566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1656d3b3e55cSJunchao Zhang   PetscFunctionReturn(0);
1657d3b3e55cSJunchao Zhang }
1658d3b3e55cSJunchao Zhang 
1659d3b3e55cSJunchao Zhang /*@C
1660a1729e3fSJunchao Zhang    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1661a1729e3fSJunchao Zhang 
1662a1729e3fSJunchao Zhang    Collective
1663a1729e3fSJunchao Zhang 
16644165533cSJose E. Roman    Input Parameters:
1665a1729e3fSJunchao Zhang +  sf - star forest
1666a1729e3fSJunchao Zhang .  unit - data type
1667a1729e3fSJunchao Zhang .  leafdata - leaf values to use in reduction
1668a1729e3fSJunchao Zhang -  op - operation to use for reduction
1669a1729e3fSJunchao Zhang 
16704165533cSJose E. Roman    Output Parameters:
1671a1729e3fSJunchao Zhang +  rootdata - root values to be updated, input state is seen by first process to perform an update
1672a1729e3fSJunchao Zhang -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1673a1729e3fSJunchao Zhang 
1674a1729e3fSJunchao Zhang    Level: advanced
1675a1729e3fSJunchao Zhang 
1676cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1677a1729e3fSJunchao Zhang @*/
1678d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1679d71ae5a4SJacob Faibussowitsch {
1680a1729e3fSJunchao Zhang   PetscFunctionBegin;
1681a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16829566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1683dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
16849566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1685a1729e3fSJunchao Zhang   PetscFunctionReturn(0);
1686a1729e3fSJunchao Zhang }
1687a1729e3fSJunchao Zhang 
1688a1729e3fSJunchao Zhang /*@C
1689cab54364SBarry Smith    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
169095fce210SBarry Smith 
169195fce210SBarry Smith    Collective
169295fce210SBarry Smith 
16934165533cSJose E. Roman    Input Parameter:
169495fce210SBarry Smith .  sf - star forest
169595fce210SBarry Smith 
16964165533cSJose E. Roman    Output Parameter:
169795fce210SBarry Smith .  degree - degree of each root vertex
169895fce210SBarry Smith 
169995fce210SBarry Smith    Level: advanced
170095fce210SBarry Smith 
1701cab54364SBarry Smith    Note:
1702cab54364SBarry Smith    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.
1703ffe67aa5SVáclav Hapla 
1704cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
170595fce210SBarry Smith @*/
1706d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1707d71ae5a4SJacob Faibussowitsch {
170895fce210SBarry Smith   PetscFunctionBegin;
170995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
171095fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
171195fce210SBarry Smith   PetscValidPointer(degree, 2);
1712803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17135b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
171428b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17155b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
17169566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
17179566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
171829046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
17199837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
17209566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
172195fce210SBarry Smith   }
172295fce210SBarry Smith   *degree = NULL;
172395fce210SBarry Smith   PetscFunctionReturn(0);
172495fce210SBarry Smith }
172595fce210SBarry Smith 
172695fce210SBarry Smith /*@C
1727cab54364SBarry Smith    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
172895fce210SBarry Smith 
172995fce210SBarry Smith    Collective
173095fce210SBarry Smith 
17314165533cSJose E. Roman    Input Parameter:
173295fce210SBarry Smith .  sf - star forest
173395fce210SBarry Smith 
17344165533cSJose E. Roman    Output Parameter:
173595fce210SBarry Smith .  degree - degree of each root vertex
173695fce210SBarry Smith 
173795fce210SBarry Smith    Level: developer
173895fce210SBarry Smith 
1739cab54364SBarry Smith    Note:
1740cab54364SBarry Smith    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.
1741ffe67aa5SVáclav Hapla 
1742cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
174395fce210SBarry Smith @*/
1744d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1745d71ae5a4SJacob Faibussowitsch {
174695fce210SBarry Smith   PetscFunctionBegin;
174795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
174895fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
174929046d53SLisandro Dalcin   PetscValidPointer(degree, 2);
175095fce210SBarry Smith   if (!sf->degreeknown) {
175128b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
17529566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
17539566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
175495fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
175595fce210SBarry Smith   }
175695fce210SBarry Smith   *degree = sf->degree;
175795fce210SBarry Smith   PetscFunctionReturn(0);
175895fce210SBarry Smith }
175995fce210SBarry Smith 
1760673100f5SVaclav Hapla /*@C
1761cab54364SBarry Smith    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`).
176266dfcd1aSVaclav Hapla    Each multi-root is assigned index of the corresponding original root.
1763673100f5SVaclav Hapla 
1764673100f5SVaclav Hapla    Collective
1765673100f5SVaclav Hapla 
17664165533cSJose E. Roman    Input Parameters:
1767673100f5SVaclav Hapla +  sf - star forest
1768cab54364SBarry Smith -  degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1769673100f5SVaclav Hapla 
17704165533cSJose E. Roman    Output Parameters:
177166dfcd1aSVaclav Hapla +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
177266dfcd1aSVaclav Hapla -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1773673100f5SVaclav Hapla 
1774673100f5SVaclav Hapla    Level: developer
1775673100f5SVaclav Hapla 
1776cab54364SBarry Smith    Note:
1777cab54364SBarry Smith    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1778ffe67aa5SVáclav Hapla 
1779cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1780673100f5SVaclav Hapla @*/
1781d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1782d71ae5a4SJacob Faibussowitsch {
1783673100f5SVaclav Hapla   PetscSF  msf;
1784673100f5SVaclav Hapla   PetscInt i, j, k, nroots, nmroots;
1785673100f5SVaclav Hapla 
1786673100f5SVaclav Hapla   PetscFunctionBegin;
1787673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17889566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
178929328920SVaclav Hapla   if (nroots) PetscValidIntPointer(degree, 2);
179066dfcd1aSVaclav Hapla   if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3);
179166dfcd1aSVaclav Hapla   PetscValidPointer(multiRootsOrigNumbering, 4);
17929566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
17939566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
17949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1795673100f5SVaclav Hapla   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1796673100f5SVaclav Hapla     if (!degree[i]) continue;
1797ad540459SPierre Jolivet     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1798673100f5SVaclav Hapla   }
179908401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
180066dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
1801673100f5SVaclav Hapla   PetscFunctionReturn(0);
1802673100f5SVaclav Hapla }
1803673100f5SVaclav Hapla 
180495fce210SBarry Smith /*@C
1805cab54364SBarry Smith    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
180695fce210SBarry Smith 
180795fce210SBarry Smith    Collective
180895fce210SBarry Smith 
18094165533cSJose E. Roman    Input Parameters:
181095fce210SBarry Smith +  sf - star forest
181195fce210SBarry Smith .  unit - data type
181295fce210SBarry Smith -  leafdata - leaf data to gather to roots
181395fce210SBarry Smith 
18144165533cSJose E. Roman    Output Parameter:
181595fce210SBarry Smith .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
181695fce210SBarry Smith 
181795fce210SBarry Smith    Level: intermediate
181895fce210SBarry Smith 
1819cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
182095fce210SBarry Smith @*/
1821d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1822d71ae5a4SJacob Faibussowitsch {
1823a5526d50SJunchao Zhang   PetscSF multi = NULL;
182495fce210SBarry Smith 
182595fce210SBarry Smith   PetscFunctionBegin;
182695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18279566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
18289566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
18299566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
183095fce210SBarry Smith   PetscFunctionReturn(0);
183195fce210SBarry Smith }
183295fce210SBarry Smith 
183395fce210SBarry Smith /*@C
1834cab54364SBarry Smith    PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
183595fce210SBarry Smith 
183695fce210SBarry Smith    Collective
183795fce210SBarry Smith 
18384165533cSJose E. Roman    Input Parameters:
183995fce210SBarry Smith +  sf - star forest
184095fce210SBarry Smith .  unit - data type
184195fce210SBarry Smith -  leafdata - leaf data to gather to roots
184295fce210SBarry Smith 
18434165533cSJose E. Roman    Output Parameter:
184495fce210SBarry Smith .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
184595fce210SBarry Smith 
184695fce210SBarry Smith    Level: intermediate
184795fce210SBarry Smith 
1848cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
184995fce210SBarry Smith @*/
1850d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1851d71ae5a4SJacob Faibussowitsch {
1852a5526d50SJunchao Zhang   PetscSF multi = NULL;
185395fce210SBarry Smith 
185495fce210SBarry Smith   PetscFunctionBegin;
185595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18569566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
18579566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
185895fce210SBarry Smith   PetscFunctionReturn(0);
185995fce210SBarry Smith }
186095fce210SBarry Smith 
186195fce210SBarry Smith /*@C
1862cab54364SBarry Smith    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
186395fce210SBarry Smith 
186495fce210SBarry Smith    Collective
186595fce210SBarry Smith 
18664165533cSJose E. Roman    Input Parameters:
186795fce210SBarry Smith +  sf - star forest
186895fce210SBarry Smith .  unit - data type
186995fce210SBarry Smith -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
187095fce210SBarry Smith 
18714165533cSJose E. Roman    Output Parameter:
187295fce210SBarry Smith .  leafdata - leaf data to be update with personal data from each respective root
187395fce210SBarry Smith 
187495fce210SBarry Smith    Level: intermediate
187595fce210SBarry Smith 
1876cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
187795fce210SBarry Smith @*/
1878d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1879d71ae5a4SJacob Faibussowitsch {
1880a5526d50SJunchao Zhang   PetscSF multi = NULL;
188195fce210SBarry Smith 
188295fce210SBarry Smith   PetscFunctionBegin;
188395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18849566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
18859566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
18869566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
188795fce210SBarry Smith   PetscFunctionReturn(0);
188895fce210SBarry Smith }
188995fce210SBarry Smith 
189095fce210SBarry Smith /*@C
1891cab54364SBarry Smith    PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
189295fce210SBarry Smith 
189395fce210SBarry Smith    Collective
189495fce210SBarry Smith 
18954165533cSJose E. Roman    Input Parameters:
189695fce210SBarry Smith +  sf - star forest
189795fce210SBarry Smith .  unit - data type
189895fce210SBarry Smith -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
189995fce210SBarry Smith 
19004165533cSJose E. Roman    Output Parameter:
190195fce210SBarry Smith .  leafdata - leaf data to be update with personal data from each respective root
190295fce210SBarry Smith 
190395fce210SBarry Smith    Level: intermediate
190495fce210SBarry Smith 
1905cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
190695fce210SBarry Smith @*/
1907d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1908d71ae5a4SJacob Faibussowitsch {
1909a5526d50SJunchao Zhang   PetscSF multi = NULL;
191095fce210SBarry Smith 
191195fce210SBarry Smith   PetscFunctionBegin;
191295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19139566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19149566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
191595fce210SBarry Smith   PetscFunctionReturn(0);
191695fce210SBarry Smith }
1917a7b3aa13SAta Mesgarnejad 
1918d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1919d71ae5a4SJacob Faibussowitsch {
1920a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
1921a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
1922a072220fSLawrence Mitchell   PetscHSetI      seen;
1923a072220fSLawrence Mitchell 
1924a072220fSLawrence Mitchell   PetscFunctionBegin;
1925b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
19269566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
19279566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
1928a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
1929a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
19309566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
1931a072220fSLawrence Mitchell     }
19329566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
193308401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
19349566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
1935b458e8f1SJose E. Roman   }
1936a072220fSLawrence Mitchell   PetscFunctionReturn(0);
1937a072220fSLawrence Mitchell }
193854729392SStefano Zampini 
1939a7b3aa13SAta Mesgarnejad /*@
1940cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
1941a7b3aa13SAta Mesgarnejad 
1942a7b3aa13SAta Mesgarnejad   Input Parameters:
1943cab54364SBarry Smith + sfA - The first `PetscSF`
1944cab54364SBarry Smith - sfB - The second `PetscSF`
1945a7b3aa13SAta Mesgarnejad 
1946a7b3aa13SAta Mesgarnejad   Output Parameters:
1947cab54364SBarry Smith . sfBA - The composite `PetscSF`
1948a7b3aa13SAta Mesgarnejad 
1949a7b3aa13SAta Mesgarnejad   Level: developer
1950a7b3aa13SAta Mesgarnejad 
1951a072220fSLawrence Mitchell   Notes:
1952cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
195354729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
195454729392SStefano Zampini 
195554729392SStefano Zampini   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
195654729392SStefano Zampini   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
195754729392SStefano Zampini   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
195854729392SStefano Zampini   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1959a072220fSLawrence Mitchell 
1960db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1961a7b3aa13SAta Mesgarnejad @*/
1962d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1963d71ae5a4SJacob Faibussowitsch {
1964a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
1965d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
196654729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
196754729392SStefano Zampini   PetscInt          *localPointsBA;
196854729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
196954729392SStefano Zampini   PetscBool          denseB;
1970a7b3aa13SAta Mesgarnejad 
1971a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
1972a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
197329046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
197429046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
197529046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
197654729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
197729046d53SLisandro Dalcin   PetscValidPointer(sfBA, 3);
19789566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
19799566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
198054729392SStefano Zampini 
19819566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
19829566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
19830ea77edaSksagiyam   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size       */
19840ea77edaSksagiyam   /* numRootsB; otherwise, garbage will be broadcasted.                                   */
19850ea77edaSksagiyam   /* Example (comm size = 1):                                                             */
19860ea77edaSksagiyam   /* sfA: 0 <- (0, 0)                                                                     */
19870ea77edaSksagiyam   /* sfB: 100 <- (0, 0)                                                                   */
19880ea77edaSksagiyam   /*      101 <- (0, 1)                                                                   */
19890ea77edaSksagiyam   /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget  */
19900ea77edaSksagiyam   /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
19910ea77edaSksagiyam   /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on  */
19920ea77edaSksagiyam   /* remotePointsA; if not recasted, point 101 would receive a garbage value.             */
19939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
199454729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
199554729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
199654729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
199754729392SStefano Zampini   }
199854729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
19990ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20000ea77edaSksagiyam 
20010ea77edaSksagiyam     if (localp >= numRootsB) continue;
20020ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
200354729392SStefano Zampini   }
2004d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20059566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20070ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20080ea77edaSksagiyam     leafdataB[i].rank  = -1;
20090ea77edaSksagiyam     leafdataB[i].index = -1;
20100ea77edaSksagiyam   }
20119566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
20129566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
20139566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2014d41018fbSJunchao Zhang 
201554729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
201654729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
201754729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
201854729392SStefano Zampini     else numLeavesBA++;
201954729392SStefano Zampini   }
202054729392SStefano Zampini   if (denseB) {
2021d41018fbSJunchao Zhang     localPointsBA  = NULL;
2022d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2023d41018fbSJunchao Zhang   } else {
20249566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
20259566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
202654729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
202754729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
202854729392SStefano Zampini 
202954729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
203054729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
203154729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
203254729392SStefano Zampini       numLeavesBA++;
203354729392SStefano Zampini     }
20349566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2035d41018fbSJunchao Zhang   }
20369566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
20379566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
20389566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2039a7b3aa13SAta Mesgarnejad   PetscFunctionReturn(0);
2040a7b3aa13SAta Mesgarnejad }
20411c6ba672SJunchao Zhang 
204204c0ada0SJunchao Zhang /*@
2043cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
204404c0ada0SJunchao Zhang 
204504c0ada0SJunchao Zhang   Input Parameters:
2046cab54364SBarry Smith + sfA - The first `PetscSF`
2047cab54364SBarry Smith - sfB - The second `PetscSF`
204804c0ada0SJunchao Zhang 
204904c0ada0SJunchao Zhang   Output Parameters:
2050cab54364SBarry Smith . sfBA - The composite `PetscSF`.
205104c0ada0SJunchao Zhang 
205204c0ada0SJunchao Zhang   Level: developer
205304c0ada0SJunchao Zhang 
205454729392SStefano Zampini   Notes:
205554729392SStefano Zampini   Currently, the two SFs must be defined on congruent communicators and they must be true star
205654729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
205754729392SStefano Zampini   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
205854729392SStefano Zampini 
205954729392SStefano Zampini   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
206054729392SStefano Zampini   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
206154729392SStefano Zampini   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
206254729392SStefano Zampini   a Reduce on sfB, on connected roots.
206354729392SStefano Zampini 
2064db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
206504c0ada0SJunchao Zhang @*/
2066d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2067d71ae5a4SJacob Faibussowitsch {
206804c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
206904c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
207004c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
207154729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
207254729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
20735b0d146aSStefano Zampini   MPI_Op             op;
20745b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
20755b0d146aSStefano Zampini   PetscBool iswin;
20765b0d146aSStefano Zampini #endif
207704c0ada0SJunchao Zhang 
207804c0ada0SJunchao Zhang   PetscFunctionBegin;
207904c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
208004c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
208104c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
208204c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
208354729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
208404c0ada0SJunchao Zhang   PetscValidPointer(sfBA, 3);
20859566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20869566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
208754729392SStefano Zampini 
20889566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20899566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
20905b0d146aSStefano Zampini 
20915b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
20925b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
209383df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
20945b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
20955b0d146aSStefano Zampini      the root condition is not meet.
20965b0d146aSStefano Zampini    */
20975b0d146aSStefano Zampini   op = MPI_MAXLOC;
20985b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
20995b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21009566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
210183df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21025b0d146aSStefano Zampini #endif
21035b0d146aSStefano Zampini 
21049566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
210654729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
210754729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
210854729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
210954729392SStefano Zampini   }
211054729392SStefano Zampini   if (localPointsA) {
211154729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
211254729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
211354729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
211454729392SStefano Zampini     }
211554729392SStefano Zampini   } else {
211654729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
211754729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
211854729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
211954729392SStefano Zampini     }
212054729392SStefano Zampini   }
212154729392SStefano Zampini 
21229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
21239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
212454729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
212554729392SStefano Zampini     remotePointsBA[i].rank  = -1;
212654729392SStefano Zampini     remotePointsBA[i].index = -1;
212754729392SStefano Zampini   }
212854729392SStefano Zampini 
21299566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
21309566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
21319566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
213254729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
213354729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
213454729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
213554729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
213654729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
213754729392SStefano Zampini     numLeavesBA++;
213854729392SStefano Zampini   }
21399566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21409566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21419566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
214204c0ada0SJunchao Zhang   PetscFunctionReturn(0);
214304c0ada0SJunchao Zhang }
214404c0ada0SJunchao Zhang 
21451c6ba672SJunchao Zhang /*
2146cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
21471c6ba672SJunchao Zhang 
21481c6ba672SJunchao Zhang   Input Parameters:
2149cab54364SBarry Smith . sf - The global `PetscSF`
21501c6ba672SJunchao Zhang 
21511c6ba672SJunchao Zhang   Output Parameters:
2152cab54364SBarry Smith . out - The local `PetscSF`
2153cab54364SBarry Smith 
2154cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
21551c6ba672SJunchao Zhang  */
2156d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2157d71ae5a4SJacob Faibussowitsch {
21581c6ba672SJunchao Zhang   MPI_Comm           comm;
21591c6ba672SJunchao Zhang   PetscMPIInt        myrank;
21601c6ba672SJunchao Zhang   const PetscInt    *ilocal;
21611c6ba672SJunchao Zhang   const PetscSFNode *iremote;
21621c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
21631c6ba672SJunchao Zhang   PetscSFNode       *liremote;
21641c6ba672SJunchao Zhang   PetscSF            lsf;
21651c6ba672SJunchao Zhang 
21661c6ba672SJunchao Zhang   PetscFunctionBegin;
21671c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2168dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2169dbbe0bcdSBarry Smith   else {
21701c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
21719566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
21729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
21731c6ba672SJunchao Zhang 
21741c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
21759566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
21769371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
21779371c9d4SSatish Balay       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
21789371c9d4SSatish Balay     }
21799566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
21809566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
21811c6ba672SJunchao Zhang 
21821c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
21831c6ba672SJunchao Zhang       if (iremote[i].rank == (PetscInt)myrank) {
21841c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
21851c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
21861c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
21871c6ba672SJunchao Zhang         j++;
21881c6ba672SJunchao Zhang       }
21891c6ba672SJunchao Zhang     }
21909566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
21919566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
21929566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
21939566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
21941c6ba672SJunchao Zhang     *out = lsf;
21951c6ba672SJunchao Zhang   }
21961c6ba672SJunchao Zhang   PetscFunctionReturn(0);
21971c6ba672SJunchao Zhang }
2198dd5b3ca6SJunchao Zhang 
2199dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2200d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2201d71ae5a4SJacob Faibussowitsch {
2202eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2203dd5b3ca6SJunchao Zhang 
2204dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2205dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22069566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22079566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22089566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22099566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2210dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22119566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2212dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
2213dd5b3ca6SJunchao Zhang }
2214dd5b3ca6SJunchao Zhang 
2215157edd7aSVaclav Hapla /*@
2216cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2217157edd7aSVaclav Hapla 
2218157edd7aSVaclav Hapla   Input Parameters:
2219157edd7aSVaclav Hapla + comm - the communicator
2220cab54364SBarry Smith . nsfs - the number of input `PetscSF`
2221cab54364SBarry Smith . sfs  - the array of input `PetscSF`
2222cab54364SBarry Smith . shareRoots - the flag whether roots of input `PetscSF` are taken as shared (`PETSC_TRUE`), or separate and concatenated (`PETSC_FALSE`)
2223cab54364SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage
2224157edd7aSVaclav Hapla 
2225157edd7aSVaclav Hapla   Output Parameters:
2226cab54364SBarry Smith . newsf - The resulting `PetscSF`
2227157edd7aSVaclav Hapla 
2228157edd7aSVaclav Hapla   Level: developer
2229157edd7aSVaclav Hapla 
2230157edd7aSVaclav Hapla   Notes:
2231157edd7aSVaclav Hapla   The communicator of all SFs in sfs must be comm.
2232157edd7aSVaclav Hapla 
2233157edd7aSVaclav Hapla   The offsets in leafOffsets are added to the original leaf indices.
2234157edd7aSVaclav Hapla 
2235157edd7aSVaclav Hapla   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2236157edd7aSVaclav Hapla   In this case, NULL is also passed as ilocal to the resulting SF.
2237157edd7aSVaclav Hapla 
2238157edd7aSVaclav Hapla   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2239157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2240157edd7aSVaclav Hapla 
2241db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2242157edd7aSVaclav Hapla @*/
2243d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2244d71ae5a4SJacob Faibussowitsch {
2245157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2246157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2247157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2248157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2249157edd7aSVaclav Hapla   PetscInt    *rootOffsets;
2250157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
2251157edd7aSVaclav Hapla   PetscMPIInt  rank;
2252157edd7aSVaclav Hapla 
2253157edd7aSVaclav Hapla   PetscFunctionBegin;
2254157edd7aSVaclav Hapla   {
2255157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2256157edd7aSVaclav Hapla 
22579566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2258157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2259157edd7aSVaclav Hapla     PetscValidPointer(sfs, 3);
2260157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2261157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2262157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2263157edd7aSVaclav Hapla     }
2264157edd7aSVaclav Hapla     PetscValidLogicalCollectiveBool(dummy, shareRoots, 4);
2265157edd7aSVaclav Hapla     if (leafOffsets) PetscValidIntPointer(leafOffsets, 5);
2266157edd7aSVaclav Hapla     PetscValidPointer(newsf, 6);
22679566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2268157edd7aSVaclav Hapla   }
2269157edd7aSVaclav Hapla   if (!nsfs) {
22709566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
22719566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2272157edd7aSVaclav Hapla     PetscFunctionReturn(0);
2273157edd7aSVaclav Hapla   }
22749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2275157edd7aSVaclav Hapla 
22769566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets));
2277157edd7aSVaclav Hapla   if (shareRoots) {
22789566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2279157edd7aSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
2280157edd7aSVaclav Hapla       for (s = 1; s < nsfs; s++) {
2281157edd7aSVaclav Hapla         PetscInt nr;
2282157edd7aSVaclav Hapla 
22839566063dSJacob Faibussowitsch         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2284157edd7aSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2285157edd7aSVaclav Hapla       }
2286157edd7aSVaclav Hapla     }
2287157edd7aSVaclav Hapla   } else {
2288157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2289157edd7aSVaclav Hapla       PetscInt nr;
2290157edd7aSVaclav Hapla 
22919566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2292157edd7aSVaclav Hapla       rootOffsets[s + 1] = rootOffsets[s] + nr;
2293157edd7aSVaclav Hapla     }
2294157edd7aSVaclav Hapla     nRoots = rootOffsets[nsfs];
2295157edd7aSVaclav Hapla   }
2296157edd7aSVaclav Hapla 
2297157edd7aSVaclav Hapla   /* Calculate leaf array offsets and automatic root offsets */
22989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2299157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2300157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2301157edd7aSVaclav Hapla     PetscInt nl;
2302157edd7aSVaclav Hapla 
23039566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2304157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2305157edd7aSVaclav Hapla   }
2306157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2307157edd7aSVaclav Hapla 
2308157edd7aSVaclav Hapla   if (!leafOffsets) {
2309157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2310157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2311157edd7aSVaclav Hapla       const PetscInt *ilocal;
2312157edd7aSVaclav Hapla 
23139566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2314157edd7aSVaclav Hapla       if (ilocal) {
2315157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2316157edd7aSVaclav Hapla         break;
2317157edd7aSVaclav Hapla       }
2318157edd7aSVaclav Hapla     }
2319157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2320157edd7aSVaclav Hapla   }
2321157edd7aSVaclav Hapla 
2322157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2323157edd7aSVaclav Hapla   ilocal_new = NULL;
2324157edd7aSVaclav Hapla   if (!all_ilocal_null) {
23259566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2326157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2327157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2328157edd7aSVaclav Hapla       const PetscInt *ilocal;
2329157edd7aSVaclav Hapla       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2330157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2331157edd7aSVaclav Hapla 
23329566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2333157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2334157edd7aSVaclav Hapla     }
2335157edd7aSVaclav Hapla   }
2336157edd7aSVaclav Hapla 
2337157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
23389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2339157edd7aSVaclav Hapla   for (i = 0; i < nLeaves; i++) {
2340157edd7aSVaclav Hapla     iremote_new[i].rank  = -1;
2341157edd7aSVaclav Hapla     iremote_new[i].index = -1;
2342157edd7aSVaclav Hapla   }
2343157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2344157edd7aSVaclav Hapla     PetscInt           i, nl, nr;
2345157edd7aSVaclav Hapla     PetscSF            tmp_sf;
2346157edd7aSVaclav Hapla     const PetscSFNode *iremote;
2347157edd7aSVaclav Hapla     PetscSFNode       *tmp_rootdata;
2348157edd7aSVaclav Hapla     PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2349157edd7aSVaclav Hapla 
23509566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
23519566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &tmp_sf));
2352157edd7aSVaclav Hapla     /* create helper SF with contiguous leaves */
23539566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
23549566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(tmp_sf));
23559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2356157edd7aSVaclav Hapla     for (i = 0; i < nr; i++) {
2357157edd7aSVaclav Hapla       tmp_rootdata[i].index = i + rootOffsets[s];
2358157edd7aSVaclav Hapla       tmp_rootdata[i].rank  = (PetscInt)rank;
2359157edd7aSVaclav Hapla     }
23609566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
23619566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
23629566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&tmp_sf));
23639566063dSJacob Faibussowitsch     PetscCall(PetscFree(tmp_rootdata));
2364157edd7aSVaclav Hapla   }
2365157edd7aSVaclav Hapla 
2366157edd7aSVaclav Hapla   /* Build the new SF */
23679566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, newsf));
23689566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
23699566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
23709566063dSJacob Faibussowitsch   PetscCall(PetscFree(rootOffsets));
23719566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
2372157edd7aSVaclav Hapla   PetscFunctionReturn(0);
2373157edd7aSVaclav Hapla }
2374