xref: /petsc/src/vec/is/sf/interface/sf.c (revision 7fd2d3dbf8105d3f2e002a5ca11f019cd0ad7420)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_HAVE_CUDA)
6   #include <cuda_runtime.h>
7 #endif
8 
9 #if defined(PETSC_HAVE_HIP)
10   #include <hip/hip_runtime.h>
11 #endif
12 
13 #if defined(PETSC_USE_DEBUG)
14 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
15     if (PetscUnlikely(!(sf)->graphset))                              \
16       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17   } while (0)
18 #else
19 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20 #endif
21 
22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
23 
24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25 {
26   PetscFunctionBegin;
27   PetscValidPointer(type,2);
28   *type = PETSC_MEMTYPE_HOST;
29 #if defined(PETSC_HAVE_CUDA)
30   if (PetscCUDAInitialized && data) {
31     cudaError_t                  cerr;
32     struct cudaPointerAttributes attr;
33     enum cudaMemoryType          mtype;
34     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35     cudaGetLastError(); /* Reset the last error */
36     #if (CUDART_VERSION < 10000)
37       mtype = attr.memoryType;
38     #else
39       mtype = attr.type;
40     #endif
41     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42   }
43 #endif
44 
45 #if defined(PETSC_HAVE_HIP)
46   if (PetscHIPInitialized && data) {
47     hipError_t                   cerr;
48     struct hipPointerAttribute_t attr;
49     enum hipMemoryType           mtype;
50     cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
51     hipGetLastError(); /* Reset the last error */
52     mtype = attr.memoryType;
53     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54   }
55 #endif
56   PetscFunctionReturn(0);
57 }
58 
59 /*@
60    PetscSFCreate - create a star forest communication context
61 
62    Collective
63 
64    Input Arguments:
65 .  comm - communicator on which the star forest will operate
66 
67    Output Arguments:
68 .  sf - new star forest context
69 
70    Options Database Keys:
71 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
72 .  -sf_type window    -Use MPI-3 one-sided window for communication
73 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
74 
75    Level: intermediate
76 
77    Notes:
78    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80    SFs are optimized and they have better performance than general SFs.
81 
82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83 @*/
84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85 {
86   PetscErrorCode ierr;
87   PetscSF        b;
88 
89   PetscFunctionBegin;
90   PetscValidPointer(sf,2);
91   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
92 
93   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
94 
95   b->nroots    = -1;
96   b->nleaves   = -1;
97   b->minleaf   = PETSC_MAX_INT;
98   b->maxleaf   = PETSC_MIN_INT;
99   b->nranks    = -1;
100   b->rankorder = PETSC_TRUE;
101   b->ingroup   = MPI_GROUP_NULL;
102   b->outgroup  = MPI_GROUP_NULL;
103   b->graphset  = PETSC_FALSE;
104 
105   *sf = b;
106   PetscFunctionReturn(0);
107 }
108 
109 /*@
110    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
111 
112    Collective
113 
114    Input Arguments:
115 .  sf - star forest
116 
117    Level: advanced
118 
119 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
120 @*/
121 PetscErrorCode PetscSFReset(PetscSF sf)
122 {
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
127   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
128   sf->nroots   = -1;
129   sf->nleaves  = -1;
130   sf->minleaf  = PETSC_MAX_INT;
131   sf->maxleaf  = PETSC_MIN_INT;
132   sf->mine     = NULL;
133   sf->remote   = NULL;
134   sf->graphset = PETSC_FALSE;
135   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
136   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
137   sf->nranks = -1;
138   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
139 #if defined(PETSC_HAVE_DEVICE)
140   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
141 #endif
142   sf->degreeknown = PETSC_FALSE;
143   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
144   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
145   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
146   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
147   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
148   sf->setupcalled = PETSC_FALSE;
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153    PetscSFSetType - Set the PetscSF communication implementation
154 
155    Collective on PetscSF
156 
157    Input Parameters:
158 +  sf - the PetscSF context
159 -  type - a known method
160 
161    Options Database Key:
162 .  -sf_type <type> - Sets the method; use -help for a list
163    of available methods (for instance, window, basic, neighbor)
164 
165    Notes:
166    See "include/petscsf.h" for available methods (for instance)
167 +    PETSCSFWINDOW - MPI-2/3 one-sided
168 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
169 
170   Level: intermediate
171 
172 .seealso: PetscSFType, PetscSFCreate()
173 @*/
174 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
175 {
176   PetscErrorCode ierr,(*r)(PetscSF);
177   PetscBool      match;
178 
179   PetscFunctionBegin;
180   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
181   PetscValidCharPointer(type,2);
182 
183   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
184   if (match) PetscFunctionReturn(0);
185 
186   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
187   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
188   /* Destroy the previous PetscSF implementation context */
189   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
190   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
191   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
192   ierr = (*r)(sf);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 /*@C
197   PetscSFGetType - Get the PetscSF communication implementation
198 
199   Not Collective
200 
201   Input Parameter:
202 . sf  - the PetscSF context
203 
204   Output Parameter:
205 . type - the PetscSF type name
206 
207   Level: intermediate
208 
209 .seealso: PetscSFSetType(), PetscSFCreate()
210 @*/
211 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
212 {
213   PetscFunctionBegin;
214   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
215   PetscValidPointer(type,2);
216   *type = ((PetscObject)sf)->type_name;
217   PetscFunctionReturn(0);
218 }
219 
220 /*@
221    PetscSFDestroy - destroy star forest
222 
223    Collective
224 
225    Input Arguments:
226 .  sf - address of star forest
227 
228    Level: intermediate
229 
230 .seealso: PetscSFCreate(), PetscSFReset()
231 @*/
232 PetscErrorCode PetscSFDestroy(PetscSF *sf)
233 {
234   PetscErrorCode ierr;
235 
236   PetscFunctionBegin;
237   if (!*sf) PetscFunctionReturn(0);
238   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
239   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
240   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
241   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
242   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
247 {
248   PetscInt           i, nleaves;
249   PetscMPIInt        size;
250   const PetscInt    *ilocal;
251   const PetscSFNode *iremote;
252   PetscErrorCode     ierr;
253 
254   PetscFunctionBegin;
255   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
256   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
257   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
258   for (i = 0; i < nleaves; i++) {
259     const PetscInt rank = iremote[i].rank;
260     const PetscInt remote = iremote[i].index;
261     const PetscInt leaf = ilocal ? ilocal[i] : i;
262     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
263     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
264     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
265   }
266   PetscFunctionReturn(0);
267 }
268 
269 /*@
270    PetscSFSetUp - set up communication structures
271 
272    Collective
273 
274    Input Arguments:
275 .  sf - star forest communication object
276 
277    Level: beginner
278 
279 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
280 @*/
281 PetscErrorCode PetscSFSetUp(PetscSF sf)
282 {
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
287   PetscSFCheckGraphSet(sf,1);
288   if (sf->setupcalled) PetscFunctionReturn(0);
289   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
290   sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
291   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
292   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
293   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
294   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
295   sf->setupcalled = PETSC_TRUE;
296   PetscFunctionReturn(0);
297 }
298 
299 /*@
300    PetscSFSetFromOptions - set PetscSF options using the options database
301 
302    Logically Collective
303 
304    Input Arguments:
305 .  sf - star forest
306 
307    Options Database Keys:
308 +  -sf_type               - implementation type, see PetscSFSetType()
309 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
310 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
311                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
312                             If true, this option only works with -use_cuda_aware_mpi 1.
313 -  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
314                                If true, this option only works with -use_cuda_aware_mpi 1.
315 
316    Level: intermediate
317 @*/
318 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
319 {
320   PetscSFType    deft;
321   char           type[256];
322   PetscErrorCode ierr;
323   PetscBool      flg;
324 
325   PetscFunctionBegin;
326   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
327   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
328   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
329   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
330   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
331   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
332 
333 #if defined(PETSC_HAVE_DEVICE)
334   sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
335   ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
336   sf->use_stream_aware_mpi = PETSC_FALSE;
337   ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
338 #endif
339   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
340   ierr = PetscOptionsEnd();CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 /*@
345    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
346 
347    Logically Collective
348 
349    Input Arguments:
350 +  sf - star forest
351 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
352 
353    Level: advanced
354 
355 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
356 @*/
357 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
358 {
359   PetscFunctionBegin;
360   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
361   PetscValidLogicalCollectiveBool(sf,flg,2);
362   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
363   sf->rankorder = flg;
364   PetscFunctionReturn(0);
365 }
366 
367 /*@
368    PetscSFSetGraph - Set a parallel star forest
369 
370    Collective
371 
372    Input Arguments:
373 +  sf - star forest
374 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
375 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
376 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
377 during setup in debug mode)
378 .  localmode - copy mode for ilocal
379 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
380 during setup in debug mode)
381 -  remotemode - copy mode for iremote
382 
383    Level: intermediate
384 
385    Notes:
386     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
387 
388    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
389    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
390    needed
391 
392    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
393    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
394 
395 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
396 @*/
397 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
398 {
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
403   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
404   if (nleaves > 0) PetscValidPointer(iremote,6);
405   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
406   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
407 
408   if (sf->nroots >= 0) { /* Reset only if graph already set */
409     ierr = PetscSFReset(sf);CHKERRQ(ierr);
410   }
411 
412   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
413 
414   sf->nroots  = nroots;
415   sf->nleaves = nleaves;
416 
417   if (nleaves && ilocal) {
418     PetscInt i;
419     PetscInt minleaf = PETSC_MAX_INT;
420     PetscInt maxleaf = PETSC_MIN_INT;
421     int      contiguous = 1;
422     for (i=0; i<nleaves; i++) {
423       minleaf = PetscMin(minleaf,ilocal[i]);
424       maxleaf = PetscMax(maxleaf,ilocal[i]);
425       contiguous &= (ilocal[i] == i);
426     }
427     sf->minleaf = minleaf;
428     sf->maxleaf = maxleaf;
429     if (contiguous) {
430       if (localmode == PETSC_OWN_POINTER) {
431         ierr = PetscFree(ilocal);CHKERRQ(ierr);
432       }
433       ilocal = NULL;
434     }
435   } else {
436     sf->minleaf = 0;
437     sf->maxleaf = nleaves - 1;
438   }
439 
440   if (ilocal) {
441     switch (localmode) {
442     case PETSC_COPY_VALUES:
443       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
444       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
445       sf->mine = sf->mine_alloc;
446       break;
447     case PETSC_OWN_POINTER:
448       sf->mine_alloc = (PetscInt*)ilocal;
449       sf->mine       = sf->mine_alloc;
450       break;
451     case PETSC_USE_POINTER:
452       sf->mine_alloc = NULL;
453       sf->mine       = (PetscInt*)ilocal;
454       break;
455     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
456     }
457   }
458 
459   switch (remotemode) {
460   case PETSC_COPY_VALUES:
461     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
462     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
463     sf->remote = sf->remote_alloc;
464     break;
465   case PETSC_OWN_POINTER:
466     sf->remote_alloc = (PetscSFNode*)iremote;
467     sf->remote       = sf->remote_alloc;
468     break;
469   case PETSC_USE_POINTER:
470     sf->remote_alloc = NULL;
471     sf->remote       = (PetscSFNode*)iremote;
472     break;
473   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
474   }
475 
476   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
477   sf->graphset = PETSC_TRUE;
478   PetscFunctionReturn(0);
479 }
480 
481 /*@
482   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
483 
484   Collective
485 
486   Input Parameters:
487 + sf      - The PetscSF
488 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
489 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
490 
491   Notes:
492   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
493   n and N are local and global sizes of x respectively.
494 
495   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
496   sequential vectors y on all ranks.
497 
498   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
499   sequential vector y on rank 0.
500 
501   In above cases, entries of x are roots and entries of y are leaves.
502 
503   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
504   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
505   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
506   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
507   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
508 
509   In this case, roots and leaves are symmetric.
510 
511   Level: intermediate
512  @*/
513 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
514 {
515   MPI_Comm       comm;
516   PetscInt       n,N,res[2];
517   PetscMPIInt    rank,size;
518   PetscSFType    type;
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
523   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
524   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
525   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
526 
527   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
528     type = PETSCSFALLTOALL;
529     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
530     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
531     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
532     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
533   } else {
534     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
535     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
536     res[0] = n;
537     res[1] = -n;
538     /* Check if n are same over all ranks so that we can optimize it */
539     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
540     if (res[0] == -res[1]) { /* same n */
541       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
542     } else {
543       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
544     }
545     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
546   }
547   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
548 
549   sf->pattern = pattern;
550   sf->mine    = NULL; /* Contiguous */
551 
552   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
553      Also set other easy stuff.
554    */
555   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
556     sf->nleaves      = N;
557     sf->nroots       = n;
558     sf->nranks       = size;
559     sf->minleaf      = 0;
560     sf->maxleaf      = N - 1;
561   } else if (pattern == PETSCSF_PATTERN_GATHER) {
562     sf->nleaves      = rank ? 0 : N;
563     sf->nroots       = n;
564     sf->nranks       = rank ? 0 : size;
565     sf->minleaf      = 0;
566     sf->maxleaf      = rank ? -1 : N - 1;
567   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568     sf->nleaves      = size;
569     sf->nroots       = size;
570     sf->nranks       = size;
571     sf->minleaf      = 0;
572     sf->maxleaf      = size - 1;
573   }
574   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
575   sf->graphset = PETSC_TRUE;
576   PetscFunctionReturn(0);
577 }
578 
579 /*@
580    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
581 
582    Collective
583 
584    Input Arguments:
585 
586 .  sf - star forest to invert
587 
588    Output Arguments:
589 .  isf - inverse of sf
590    Level: advanced
591 
592    Notes:
593    All roots must have degree 1.
594 
595    The local space may be a permutation, but cannot be sparse.
596 
597 .seealso: PetscSFSetGraph()
598 @*/
599 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
600 {
601   PetscErrorCode ierr;
602   PetscMPIInt    rank;
603   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
604   const PetscInt *ilocal;
605   PetscSFNode    *roots,*leaves;
606 
607   PetscFunctionBegin;
608   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
609   PetscSFCheckGraphSet(sf,1);
610   PetscValidPointer(isf,2);
611 
612   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
613   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
614 
615   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
616   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
617   for (i=0; i<maxlocal; i++) {
618     leaves[i].rank  = rank;
619     leaves[i].index = i;
620   }
621   for (i=0; i <nroots; i++) {
622     roots[i].rank  = -1;
623     roots[i].index = -1;
624   }
625   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
626   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
627 
628   /* Check whether our leaves are sparse */
629   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
630   if (count == nroots) newilocal = NULL;
631   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
632     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
633     for (i=0,count=0; i<nroots; i++) {
634       if (roots[i].rank >= 0) {
635         newilocal[count]   = i;
636         roots[count].rank  = roots[i].rank;
637         roots[count].index = roots[i].index;
638         count++;
639       }
640     }
641   }
642 
643   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
644   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
645   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 /*@
650    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
651 
652    Collective
653 
654    Input Arguments:
655 +  sf - communication object to duplicate
656 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
657 
658    Output Arguments:
659 .  newsf - new communication object
660 
661    Level: beginner
662 
663 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
664 @*/
665 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
666 {
667   PetscSFType    type;
668   PetscErrorCode ierr;
669 
670   PetscFunctionBegin;
671   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
672   PetscValidLogicalCollectiveEnum(sf,opt,2);
673   PetscValidPointer(newsf,3);
674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
675   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
676   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
677   if (opt == PETSCSF_DUPLICATE_GRAPH) {
678     PetscSFCheckGraphSet(sf,1);
679     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
680       PetscInt          nroots,nleaves;
681       const PetscInt    *ilocal;
682       const PetscSFNode *iremote;
683       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
684       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
685     } else {
686       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
687     }
688   }
689   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
690   PetscFunctionReturn(0);
691 }
692 
693 /*@C
694    PetscSFGetGraph - Get the graph specifying a parallel star forest
695 
696    Not Collective
697 
698    Input Arguments:
699 .  sf - star forest
700 
701    Output Arguments:
702 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
703 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
704 .  ilocal - locations of leaves in leafdata buffers
705 -  iremote - remote locations of root vertices for each leaf on the current process
706 
707    Notes:
708    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
709 
710    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
711    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
712 
713    Level: intermediate
714 
715 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
716 @*/
717 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
718 {
719   PetscErrorCode ierr;
720 
721   PetscFunctionBegin;
722   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
723   if (sf->ops->GetGraph) {
724     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
725   } else {
726     if (nroots) *nroots = sf->nroots;
727     if (nleaves) *nleaves = sf->nleaves;
728     if (ilocal) *ilocal = sf->mine;
729     if (iremote) *iremote = sf->remote;
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 /*@
735    PetscSFGetLeafRange - Get the active leaf ranges
736 
737    Not Collective
738 
739    Input Arguments:
740 .  sf - star forest
741 
742    Output Arguments:
743 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
744 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
745 
746    Level: developer
747 
748 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
749 @*/
750 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
751 {
752   PetscFunctionBegin;
753   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
754   PetscSFCheckGraphSet(sf,1);
755   if (minleaf) *minleaf = sf->minleaf;
756   if (maxleaf) *maxleaf = sf->maxleaf;
757   PetscFunctionReturn(0);
758 }
759 
760 /*@C
761    PetscSFViewFromOptions - View from Options
762 
763    Collective on PetscSF
764 
765    Input Parameters:
766 +  A - the star forest
767 .  obj - Optional object
768 -  name - command line option
769 
770    Level: intermediate
771 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
772 @*/
773 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
774 {
775   PetscErrorCode ierr;
776 
777   PetscFunctionBegin;
778   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
779   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 /*@C
784    PetscSFView - view a star forest
785 
786    Collective
787 
788    Input Arguments:
789 +  sf - star forest
790 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
791 
792    Level: beginner
793 
794 .seealso: PetscSFCreate(), PetscSFSetGraph()
795 @*/
796 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
797 {
798   PetscErrorCode    ierr;
799   PetscBool         iascii;
800   PetscViewerFormat format;
801 
802   PetscFunctionBegin;
803   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
804   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
805   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
806   PetscCheckSameComm(sf,1,viewer,2);
807   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
808   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
809   if (iascii) {
810     PetscMPIInt rank;
811     PetscInt    ii,i,j;
812 
813     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
814     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
815     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
816     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
817       if (!sf->graphset) {
818         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
819         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
820         PetscFunctionReturn(0);
821       }
822       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
823       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
824       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
825       for (i=0; i<sf->nleaves; i++) {
826         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
827       }
828       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
829       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
830       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
831         PetscMPIInt *tmpranks,*perm;
832         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
833         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
834         for (i=0; i<sf->nranks; i++) perm[i] = i;
835         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
836         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
837         for (ii=0; ii<sf->nranks; ii++) {
838           i = perm[ii];
839           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
840           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
841             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
842           }
843         }
844         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
845       }
846       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
847       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
848     }
849     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 /*@C
855    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
856 
857    Not Collective
858 
859    Input Arguments:
860 .  sf - star forest
861 
862    Output Arguments:
863 +  nranks - number of ranks referenced by local part
864 .  ranks - array of ranks
865 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
866 .  rmine - concatenated array holding local indices referencing each remote rank
867 -  rremote - concatenated array holding remote indices referenced for each remote rank
868 
869    Level: developer
870 
871 .seealso: PetscSFGetLeafRanks()
872 @*/
873 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
874 {
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
879   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
880   if (sf->ops->GetRootRanks) {
881     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
882   } else {
883     /* The generic implementation */
884     if (nranks)  *nranks  = sf->nranks;
885     if (ranks)   *ranks   = sf->ranks;
886     if (roffset) *roffset = sf->roffset;
887     if (rmine)   *rmine   = sf->rmine;
888     if (rremote) *rremote = sf->rremote;
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 /*@C
894    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
895 
896    Not Collective
897 
898    Input Arguments:
899 .  sf - star forest
900 
901    Output Arguments:
902 +  niranks - number of leaf ranks referencing roots on this process
903 .  iranks - array of ranks
904 .  ioffset - offset in irootloc for each rank (length niranks+1)
905 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
906 
907    Level: developer
908 
909 .seealso: PetscSFGetRootRanks()
910 @*/
911 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
912 {
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
917   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
918   if (sf->ops->GetLeafRanks) {
919     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
920   } else {
921     PetscSFType type;
922     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
923     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
929   PetscInt i;
930   for (i=0; i<n; i++) {
931     if (needle == list[i]) return PETSC_TRUE;
932   }
933   return PETSC_FALSE;
934 }
935 
936 /*@C
937    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
938 
939    Collective
940 
941    Input Arguments:
942 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
943 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
944 
945    Level: developer
946 
947 .seealso: PetscSFGetRootRanks()
948 @*/
949 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
950 {
951   PetscErrorCode     ierr;
952   PetscTable         table;
953   PetscTablePosition pos;
954   PetscMPIInt        size,groupsize,*groupranks;
955   PetscInt           *rcount,*ranks;
956   PetscInt           i, irank = -1,orank = -1;
957 
958   PetscFunctionBegin;
959   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
960   PetscSFCheckGraphSet(sf,1);
961   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
962   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
963   for (i=0; i<sf->nleaves; i++) {
964     /* Log 1-based rank */
965     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
966   }
967   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
968   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
969   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
970   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
971   for (i=0; i<sf->nranks; i++) {
972     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
973     ranks[i]--;             /* Convert back to 0-based */
974   }
975   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
976 
977   /* We expect that dgroup is reliably "small" while nranks could be large */
978   {
979     MPI_Group group = MPI_GROUP_NULL;
980     PetscMPIInt *dgroupranks;
981     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
982     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
983     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
984     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
985     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
986     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
987     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
988     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
989   }
990 
991   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
992   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
993     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
994       if (InList(ranks[i],groupsize,groupranks)) break;
995     }
996     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
997       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
998     }
999     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1000       PetscInt tmprank,tmpcount;
1001 
1002       tmprank             = ranks[i];
1003       tmpcount            = rcount[i];
1004       ranks[i]            = ranks[sf->ndranks];
1005       rcount[i]           = rcount[sf->ndranks];
1006       ranks[sf->ndranks]  = tmprank;
1007       rcount[sf->ndranks] = tmpcount;
1008       sf->ndranks++;
1009     }
1010   }
1011   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1012   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1013   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1014   sf->roffset[0] = 0;
1015   for (i=0; i<sf->nranks; i++) {
1016     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1017     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1018     rcount[i]        = 0;
1019   }
1020   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1021     /* short circuit */
1022     if (orank != sf->remote[i].rank) {
1023       /* Search for index of iremote[i].rank in sf->ranks */
1024       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1025       if (irank < 0) {
1026         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1027         if (irank >= 0) irank += sf->ndranks;
1028       }
1029       orank = sf->remote[i].rank;
1030     }
1031     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1032     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1033     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1034     rcount[irank]++;
1035   }
1036   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 /*@C
1041    PetscSFGetGroups - gets incoming and outgoing process groups
1042 
1043    Collective
1044 
1045    Input Argument:
1046 .  sf - star forest
1047 
1048    Output Arguments:
1049 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1050 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1051 
1052    Level: developer
1053 
1054 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1055 @*/
1056 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1057 {
1058   PetscErrorCode ierr;
1059   MPI_Group      group = MPI_GROUP_NULL;
1060 
1061   PetscFunctionBegin;
1062   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1063   if (sf->ingroup == MPI_GROUP_NULL) {
1064     PetscInt       i;
1065     const PetscInt *indegree;
1066     PetscMPIInt    rank,*outranks,*inranks;
1067     PetscSFNode    *remote;
1068     PetscSF        bgcount;
1069 
1070     /* Compute the number of incoming ranks */
1071     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1072     for (i=0; i<sf->nranks; i++) {
1073       remote[i].rank  = sf->ranks[i];
1074       remote[i].index = 0;
1075     }
1076     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1077     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1078     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1079     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1080     /* Enumerate the incoming ranks */
1081     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1082     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1083     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1084     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1085     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1086     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1087     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1088     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1089     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1090     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1091   }
1092   *incoming = sf->ingroup;
1093 
1094   if (sf->outgroup == MPI_GROUP_NULL) {
1095     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1096     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1097     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1098   }
1099   *outgoing = sf->outgroup;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*@
1104    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1105 
1106    Collective
1107 
1108    Input Argument:
1109 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1110 
1111    Output Arguments:
1112 .  multi - star forest with split roots, such that each root has degree exactly 1
1113 
1114    Level: developer
1115 
1116    Notes:
1117 
1118    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1119    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1120    edge, it is a candidate for future optimization that might involve its removal.
1121 
1122 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1123 @*/
1124 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1125 {
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1130   PetscValidPointer(multi,2);
1131   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1132     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1133     *multi = sf->multi;
1134     PetscFunctionReturn(0);
1135   }
1136   if (!sf->multi) {
1137     const PetscInt *indegree;
1138     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1139     PetscSFNode    *remote;
1140     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1141     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1142     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1143     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1144     inoffset[0] = 0;
1145     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1146     for (i=0; i<maxlocal; i++) outones[i] = 1;
1147     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1148     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1149     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1150     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1151       for (i=0; i<sf->nroots; i++) {
1152         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1153       }
1154     }
1155     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1156     for (i=0; i<sf->nleaves; i++) {
1157       remote[i].rank  = sf->remote[i].rank;
1158       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1159     }
1160     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1161     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1162     if (sf->rankorder) {        /* Sort the ranks */
1163       PetscMPIInt rank;
1164       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1165       PetscSFNode *newremote;
1166       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1167       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1168       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1169       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1170       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1171       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1172       /* Sort the incoming ranks at each vertex, build the inverse map */
1173       for (i=0; i<sf->nroots; i++) {
1174         PetscInt j;
1175         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1176         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1177         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1178       }
1179       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1180       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1181       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1182       for (i=0; i<sf->nleaves; i++) {
1183         newremote[i].rank  = sf->remote[i].rank;
1184         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1185       }
1186       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1187       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1188     }
1189     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1190   }
1191   *multi = sf->multi;
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 /*@C
1196    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1197 
1198    Collective
1199 
1200    Input Arguments:
1201 +  sf - original star forest
1202 .  nselected  - number of selected roots on this process
1203 -  selected   - indices of the selected roots on this process
1204 
1205    Output Arguments:
1206 .  esf - new star forest
1207 
1208    Level: advanced
1209 
1210    Note:
1211    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1212    be done by calling PetscSFGetGraph().
1213 
1214 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1215 @*/
1216 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1217 {
1218   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1219   const PetscInt    *ilocal;
1220   signed char       *rootdata,*leafdata,*leafmem;
1221   const PetscSFNode *iremote;
1222   PetscSFNode       *new_iremote;
1223   MPI_Comm          comm;
1224   PetscErrorCode    ierr;
1225 
1226   PetscFunctionBegin;
1227   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1228   PetscSFCheckGraphSet(sf,1);
1229   if (nselected) PetscValidPointer(selected,3);
1230   PetscValidPointer(esf,4);
1231 
1232   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1233 
1234   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1235   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1236   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1237 
1238   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1239 
1240     PetscBool dups;
1241     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1242     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1243     for (i=0; i<nselected; i++)
1244       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1245   }
1246 
1247   if (sf->ops->CreateEmbeddedSF) {
1248     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1249   } else {
1250     /* A generic version of creating embedded sf */
1251     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1252     maxlocal = maxleaf - minleaf + 1;
1253     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1254     leafdata = leafmem - minleaf;
1255     /* Tag selected roots and bcast to leaves */
1256     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1257     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1258     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1259 
1260     /* Build esf with leaves that are still connected */
1261     esf_nleaves = 0;
1262     for (i=0; i<nleaves; i++) {
1263       j = ilocal ? ilocal[i] : i;
1264       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1265          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1266       */
1267       esf_nleaves += (leafdata[j] ? 1 : 0);
1268     }
1269     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1270     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1271     for (i=n=0; i<nleaves; i++) {
1272       j = ilocal ? ilocal[i] : i;
1273       if (leafdata[j]) {
1274         new_ilocal[n]        = j;
1275         new_iremote[n].rank  = iremote[i].rank;
1276         new_iremote[n].index = iremote[i].index;
1277         ++n;
1278       }
1279     }
1280     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1281     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1282     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1283     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1284   }
1285   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /*@C
1290   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1291 
1292   Collective
1293 
1294   Input Arguments:
1295 + sf - original star forest
1296 . nselected  - number of selected leaves on this process
1297 - selected   - indices of the selected leaves on this process
1298 
1299   Output Arguments:
1300 .  newsf - new star forest
1301 
1302   Level: advanced
1303 
1304 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1305 @*/
1306 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1307 {
1308   const PetscSFNode *iremote;
1309   PetscSFNode       *new_iremote;
1310   const PetscInt    *ilocal;
1311   PetscInt          i,nroots,*leaves,*new_ilocal;
1312   MPI_Comm          comm;
1313   PetscErrorCode    ierr;
1314 
1315   PetscFunctionBegin;
1316   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1317   PetscSFCheckGraphSet(sf,1);
1318   if (nselected) PetscValidPointer(selected,3);
1319   PetscValidPointer(newsf,4);
1320 
1321   /* Uniq selected[] and put results in leaves[] */
1322   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1323   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1324   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1325   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1326   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1327 
1328   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1329   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1330     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1331   } else {
1332     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1333     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1334     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1335     for (i=0; i<nselected; ++i) {
1336       const PetscInt l     = leaves[i];
1337       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1338       new_iremote[i].rank  = iremote[l].rank;
1339       new_iremote[i].index = iremote[l].index;
1340     }
1341     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1342     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1343   }
1344   ierr = PetscFree(leaves);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /*@C
1349    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1350 
1351    Collective on PetscSF
1352 
1353    Input Arguments:
1354 +  sf - star forest on which to communicate
1355 .  unit - data type associated with each node
1356 .  rootdata - buffer to broadcast
1357 -  op - operation to use for reduction
1358 
1359    Output Arguments:
1360 .  leafdata - buffer to be reduced with values from each leaf's respective root
1361 
1362    Level: intermediate
1363 
1364 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1365 @*/
1366 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1367 {
1368   PetscErrorCode ierr;
1369   PetscMemType   rootmtype,leafmtype;
1370 
1371   PetscFunctionBegin;
1372   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1373   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1374   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1375   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1376   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1377   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1378   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /*@C
1383    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1384 
1385    Collective
1386 
1387    Input Arguments:
1388 +  sf - star forest
1389 .  unit - data type
1390 .  rootdata - buffer to broadcast
1391 -  op - operation to use for reduction
1392 
1393    Output Arguments:
1394 .  leafdata - buffer to be reduced with values from each leaf's respective root
1395 
1396    Level: intermediate
1397 
1398 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1399 @*/
1400 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1401 {
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1406   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1407   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1408   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1409   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 /*@C
1414    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1415 
1416    Collective
1417 
1418    Input Arguments:
1419 +  sf - star forest
1420 .  unit - data type
1421 .  leafdata - values to reduce
1422 -  op - reduction operation
1423 
1424    Output Arguments:
1425 .  rootdata - result of reduction of values from all leaves of each root
1426 
1427    Level: intermediate
1428 
1429 .seealso: PetscSFBcastBegin()
1430 @*/
1431 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1432 {
1433   PetscErrorCode ierr;
1434   PetscMemType   rootmtype,leafmtype;
1435 
1436   PetscFunctionBegin;
1437   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1438   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1439   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1440   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1441   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1442   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1443   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@C
1448    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1449 
1450    Collective
1451 
1452    Input Arguments:
1453 +  sf - star forest
1454 .  unit - data type
1455 .  leafdata - values to reduce
1456 -  op - reduction operation
1457 
1458    Output Arguments:
1459 .  rootdata - result of reduction of values from all leaves of each root
1460 
1461    Level: intermediate
1462 
1463 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1464 @*/
1465 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1466 {
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1471   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1472   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1473   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1474   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /*@C
1479    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1480 
1481    Collective
1482 
1483    Input Arguments:
1484 +  sf - star forest
1485 .  unit - data type
1486 .  leafdata - leaf values to use in reduction
1487 -  op - operation to use for reduction
1488 
1489    Output Arguments:
1490 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1491 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1492 
1493    Level: advanced
1494 
1495    Note:
1496    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1497    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1498    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1499    integers.
1500 
1501 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1502 @*/
1503 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1504 {
1505   PetscErrorCode ierr;
1506   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1507 
1508   PetscFunctionBegin;
1509   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1510   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1511   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1512   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1513   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1514   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1515   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1516   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1517   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 /*@C
1522    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1523 
1524    Collective
1525 
1526    Input Arguments:
1527 +  sf - star forest
1528 .  unit - data type
1529 .  leafdata - leaf values to use in reduction
1530 -  op - operation to use for reduction
1531 
1532    Output Arguments:
1533 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1534 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1535 
1536    Level: advanced
1537 
1538 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1539 @*/
1540 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1541 {
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1546   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1547   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1548   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1549   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 /*@C
1554    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1555 
1556    Collective
1557 
1558    Input Arguments:
1559 .  sf - star forest
1560 
1561    Output Arguments:
1562 .  degree - degree of each root vertex
1563 
1564    Level: advanced
1565 
1566    Notes:
1567    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1568 
1569 .seealso: PetscSFGatherBegin()
1570 @*/
1571 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1572 {
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1577   PetscSFCheckGraphSet(sf,1);
1578   PetscValidPointer(degree,2);
1579   if (!sf->degreeknown) {
1580     PetscInt i, nroots = sf->nroots, maxlocal;
1581     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1582     maxlocal = sf->maxleaf-sf->minleaf+1;
1583     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1584     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1585     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1586     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1587     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1588   }
1589   *degree = NULL;
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 /*@C
1594    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1595 
1596    Collective
1597 
1598    Input Arguments:
1599 .  sf - star forest
1600 
1601    Output Arguments:
1602 .  degree - degree of each root vertex
1603 
1604    Level: developer
1605 
1606    Notes:
1607    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1608 
1609 .seealso:
1610 @*/
1611 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1612 {
1613   PetscErrorCode ierr;
1614 
1615   PetscFunctionBegin;
1616   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1617   PetscSFCheckGraphSet(sf,1);
1618   PetscValidPointer(degree,2);
1619   if (!sf->degreeknown) {
1620     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1621     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1622     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1623     sf->degreeknown = PETSC_TRUE;
1624   }
1625   *degree = sf->degree;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 
1630 /*@C
1631    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1632    Each multi-root is assigned index of the corresponding original root.
1633 
1634    Collective
1635 
1636    Input Arguments:
1637 +  sf - star forest
1638 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1639 
1640    Output Arguments:
1641 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1642 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1643 
1644    Level: developer
1645 
1646    Notes:
1647    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1648 
1649 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1650 @*/
1651 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1652 {
1653   PetscSF             msf;
1654   PetscInt            i, j, k, nroots, nmroots;
1655   PetscErrorCode      ierr;
1656 
1657   PetscFunctionBegin;
1658   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1659   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1660   if (nroots) PetscValidIntPointer(degree,2);
1661   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1662   PetscValidPointer(multiRootsOrigNumbering,4);
1663   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1664   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1665   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1666   for (i=0,j=0,k=0; i<nroots; i++) {
1667     if (!degree[i]) continue;
1668     for (j=0; j<degree[i]; j++,k++) {
1669       (*multiRootsOrigNumbering)[k] = i;
1670     }
1671   }
1672   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1673   if (nMultiRoots) *nMultiRoots = nmroots;
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 /*@C
1678    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1679 
1680    Collective
1681 
1682    Input Arguments:
1683 +  sf - star forest
1684 .  unit - data type
1685 -  leafdata - leaf data to gather to roots
1686 
1687    Output Argument:
1688 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1689 
1690    Level: intermediate
1691 
1692 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1693 @*/
1694 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1695 {
1696   PetscErrorCode ierr;
1697   PetscSF        multi = NULL;
1698 
1699   PetscFunctionBegin;
1700   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1701   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1702   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1703   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 /*@C
1708    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1709 
1710    Collective
1711 
1712    Input Arguments:
1713 +  sf - star forest
1714 .  unit - data type
1715 -  leafdata - leaf data to gather to roots
1716 
1717    Output Argument:
1718 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1719 
1720    Level: intermediate
1721 
1722 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1723 @*/
1724 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1725 {
1726   PetscErrorCode ierr;
1727   PetscSF        multi = NULL;
1728 
1729   PetscFunctionBegin;
1730   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1731   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1732   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1733   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 /*@C
1738    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1739 
1740    Collective
1741 
1742    Input Arguments:
1743 +  sf - star forest
1744 .  unit - data type
1745 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1746 
1747    Output Argument:
1748 .  leafdata - leaf data to be update with personal data from each respective root
1749 
1750    Level: intermediate
1751 
1752 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1753 @*/
1754 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1755 {
1756   PetscErrorCode ierr;
1757   PetscSF        multi = NULL;
1758 
1759   PetscFunctionBegin;
1760   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1761   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1762   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 /*@C
1768    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1769 
1770    Collective
1771 
1772    Input Arguments:
1773 +  sf - star forest
1774 .  unit - data type
1775 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1776 
1777    Output Argument:
1778 .  leafdata - leaf data to be update with personal data from each respective root
1779 
1780    Level: intermediate
1781 
1782 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1783 @*/
1784 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1785 {
1786   PetscErrorCode ierr;
1787   PetscSF        multi = NULL;
1788 
1789   PetscFunctionBegin;
1790   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1791   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1792   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1798 {
1799   PetscInt        i, n, nleaves;
1800   const PetscInt *ilocal = NULL;
1801   PetscHSetI      seen;
1802   PetscErrorCode  ierr;
1803 
1804   PetscFunctionBegin;
1805   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1806   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1807   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1808   for (i = 0; i < nleaves; i++) {
1809     const PetscInt leaf = ilocal ? ilocal[i] : i;
1810     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1811   }
1812   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1813   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1814   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 /*@
1819   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1820 
1821   Input Parameters:
1822 + sfA - The first PetscSF
1823 - sfB - The second PetscSF
1824 
1825   Output Parameters:
1826 . sfBA - The composite SF
1827 
1828   Level: developer
1829 
1830   Notes:
1831   Currently, the two SFs must be defined on congruent communicators and they must be true star
1832   forests, i.e. the same leaf is not connected with different roots.
1833 
1834   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1835   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1836   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1837   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1838 
1839 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1840 @*/
1841 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1842 {
1843   PetscErrorCode    ierr;
1844   const PetscSFNode *remotePointsA,*remotePointsB;
1845   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1846   const PetscInt    *localPointsA,*localPointsB;
1847   PetscInt          *localPointsBA;
1848   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1849   PetscBool         denseB;
1850 
1851   PetscFunctionBegin;
1852   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1853   PetscSFCheckGraphSet(sfA,1);
1854   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1855   PetscSFCheckGraphSet(sfB,2);
1856   PetscCheckSameComm(sfA,1,sfB,2);
1857   PetscValidPointer(sfBA,3);
1858   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1859   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1860 
1861   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1862   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1863   if (localPointsA) {
1864     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1865     for (i=0; i<numRootsB; i++) {
1866       reorderedRemotePointsA[i].rank = -1;
1867       reorderedRemotePointsA[i].index = -1;
1868     }
1869     for (i=0; i<numLeavesA; i++) {
1870       if (localPointsA[i] >= numRootsB) continue;
1871       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1872     }
1873     remotePointsA = reorderedRemotePointsA;
1874   }
1875   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1876   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1877   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1878   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1879   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1880 
1881   denseB = (PetscBool)!localPointsB;
1882   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1883     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1884     else numLeavesBA++;
1885   }
1886   if (denseB) {
1887     localPointsBA  = NULL;
1888     remotePointsBA = leafdataB;
1889   } else {
1890     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1891     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1892     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1893       const PetscInt l = localPointsB ? localPointsB[i] : i;
1894 
1895       if (leafdataB[l-minleaf].rank == -1) continue;
1896       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1897       localPointsBA[numLeavesBA] = l;
1898       numLeavesBA++;
1899     }
1900     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1901   }
1902   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1903   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 /*@
1908   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1909 
1910   Input Parameters:
1911 + sfA - The first PetscSF
1912 - sfB - The second PetscSF
1913 
1914   Output Parameters:
1915 . sfBA - The composite SF.
1916 
1917   Level: developer
1918 
1919   Notes:
1920   Currently, the two SFs must be defined on congruent communicators and they must be true star
1921   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1922   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1923 
1924   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1925   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1926   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1927   a Reduce on sfB, on connected roots.
1928 
1929 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1930 @*/
1931 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1932 {
1933   PetscErrorCode    ierr;
1934   const PetscSFNode *remotePointsA,*remotePointsB;
1935   PetscSFNode       *remotePointsBA;
1936   const PetscInt    *localPointsA,*localPointsB;
1937   PetscSFNode       *reorderedRemotePointsA = NULL;
1938   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1939   MPI_Op            op;
1940 #if defined(PETSC_USE_64BIT_INDICES)
1941   PetscBool         iswin;
1942 #endif
1943 
1944   PetscFunctionBegin;
1945   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1946   PetscSFCheckGraphSet(sfA,1);
1947   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1948   PetscSFCheckGraphSet(sfB,2);
1949   PetscCheckSameComm(sfA,1,sfB,2);
1950   PetscValidPointer(sfBA,3);
1951   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1952   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1953 
1954   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1955   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1956 
1957   /* TODO: Check roots of sfB have degree of 1 */
1958   /* Once we implement it, we can replace the MPI_MAXLOC
1959      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1960      We use MPI_MAXLOC only to have a deterministic output from this routine if
1961      the root condition is not meet.
1962    */
1963   op = MPI_MAXLOC;
1964 #if defined(PETSC_USE_64BIT_INDICES)
1965   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1966   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
1967   if (iswin) op = MPIU_REPLACE;
1968 #endif
1969 
1970   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1971   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1972   for (i=0; i<maxleaf - minleaf + 1; i++) {
1973     reorderedRemotePointsA[i].rank = -1;
1974     reorderedRemotePointsA[i].index = -1;
1975   }
1976   if (localPointsA) {
1977     for (i=0; i<numLeavesA; i++) {
1978       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1979       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1980     }
1981   } else {
1982     for (i=0; i<numLeavesA; i++) {
1983       if (i > maxleaf || i < minleaf) continue;
1984       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1985     }
1986   }
1987 
1988   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1989   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1990   for (i=0; i<numRootsB; i++) {
1991     remotePointsBA[i].rank = -1;
1992     remotePointsBA[i].index = -1;
1993   }
1994 
1995   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1996   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1997   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1998   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1999     if (remotePointsBA[i].rank == -1) continue;
2000     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2001     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2002     localPointsBA[numLeavesBA] = i;
2003     numLeavesBA++;
2004   }
2005   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2006   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 /*
2011   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2012 
2013   Input Parameters:
2014 . sf - The global PetscSF
2015 
2016   Output Parameters:
2017 . out - The local PetscSF
2018  */
2019 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2020 {
2021   MPI_Comm           comm;
2022   PetscMPIInt        myrank;
2023   const PetscInt     *ilocal;
2024   const PetscSFNode  *iremote;
2025   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2026   PetscSFNode        *liremote;
2027   PetscSF            lsf;
2028   PetscErrorCode     ierr;
2029 
2030   PetscFunctionBegin;
2031   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2032   if (sf->ops->CreateLocalSF) {
2033     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2034   } else {
2035     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2036     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2037     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2038 
2039     /* Find out local edges and build a local SF */
2040     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2041     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2042     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2043     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2044 
2045     for (i=j=0; i<nleaves; i++) {
2046       if (iremote[i].rank == (PetscInt)myrank) {
2047         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2048         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2049         liremote[j].index = iremote[i].index;
2050         j++;
2051       }
2052     }
2053     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2054     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2055     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2056     *out = lsf;
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2062 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2063 {
2064   PetscErrorCode     ierr;
2065   PetscMemType       rootmtype,leafmtype;
2066 
2067   PetscFunctionBegin;
2068   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2069   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2070   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2071   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2072   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2073   if (sf->ops->BcastToZero) {
2074     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2075   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2076   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080