xref: /petsc/src/vec/is/sf/interface/sf.c (revision 29046d5344efe75540424556535089496c551ef5)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petscctable.h>
3 
4 #if defined(PETSC_USE_DEBUG)
5 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
6     if (PetscUnlikely(!(sf)->graphset))                              \
7       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8   } while (0)
9 #else
10 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11 #endif
12 
13 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
14 
15 /*@
16    PetscSFCreate - create a star forest communication context
17 
18    Not Collective
19 
20    Input Arguments:
21 .  comm - communicator on which the star forest will operate
22 
23    Output Arguments:
24 .  sf - new star forest context
25 
26    Level: intermediate
27 
28 .seealso: PetscSFSetGraph(), PetscSFDestroy()
29 @*/
30 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
31 {
32   PetscErrorCode ierr;
33   PetscSF        b;
34 
35   PetscFunctionBegin;
36   PetscValidPointer(sf,2);
37   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
38 
39   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
40 
41   b->nroots    = -1;
42   b->nleaves   = -1;
43   b->minleaf   = PETSC_MAX_INT;
44   b->maxleaf   = PETSC_MIN_INT;
45   b->nranks    = -1;
46   b->rankorder = PETSC_TRUE;
47   b->ingroup   = MPI_GROUP_NULL;
48   b->outgroup  = MPI_GROUP_NULL;
49   b->graphset  = PETSC_FALSE;
50 
51   *sf = b;
52   PetscFunctionReturn(0);
53 }
54 
55 /*@
56    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
57 
58    Collective
59 
60    Input Arguments:
61 .  sf - star forest
62 
63    Level: advanced
64 
65 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
66 @*/
67 PetscErrorCode PetscSFReset(PetscSF sf)
68 {
69   PetscErrorCode ierr;
70 
71   PetscFunctionBegin;
72   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
73   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
74   sf->nroots   = -1;
75   sf->nleaves  = -1;
76   sf->minleaf  = PETSC_MAX_INT;
77   sf->maxleaf  = PETSC_MIN_INT;
78   sf->mine     = NULL;
79   sf->remote   = NULL;
80   sf->graphset = PETSC_FALSE;
81   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
82   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
83   sf->nranks = -1;
84   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
85   sf->degreeknown = PETSC_FALSE;
86   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
87   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
88   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
89   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
90   sf->setupcalled = PETSC_FALSE;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    PetscSFSetType - Set the PetscSF communication implementation
96 
97    Collective on PetscSF
98 
99    Input Parameters:
100 +  sf - the PetscSF context
101 -  type - a known method
102 
103    Options Database Key:
104 .  -sf_type <type> - Sets the method; use -help for a list
105    of available methods (for instance, window, pt2pt, neighbor)
106 
107    Notes:
108    See "include/petscsf.h" for available methods (for instance)
109 +    PETSCSFWINDOW - MPI-2/3 one-sided
110 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
111 
112   Level: intermediate
113 
114 .keywords: PetscSF, set, type
115 
116 .seealso: PetscSFType, PetscSFCreate()
117 @*/
118 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
119 {
120   PetscErrorCode ierr,(*r)(PetscSF);
121   PetscBool      match;
122 
123   PetscFunctionBegin;
124   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
125   PetscValidCharPointer(type,2);
126 
127   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
128   if (match) PetscFunctionReturn(0);
129 
130   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
131   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
132   /* Destroy the previous PetscSF implementation context */
133   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
134   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
135   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
136   ierr = (*r)(sf);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscSFGetType - Get the PetscSF communication implementation
142 
143   Not Collective
144 
145   Input Parameter:
146 . sf  - the PetscSF context
147 
148   Output Parameter:
149 . type - the PetscSF type name
150 
151   Level: intermediate
152 
153 .keywords: PetscSF, get, type
154 .seealso: PetscSFSetType(), PetscSFCreate()
155 @*/
156 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
157 {
158   PetscFunctionBegin;
159   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
160   PetscValidPointer(type,2);
161   *type = ((PetscObject)sf)->type_name;
162   PetscFunctionReturn(0);
163 }
164 
165 /*@
166    PetscSFDestroy - destroy star forest
167 
168    Collective
169 
170    Input Arguments:
171 .  sf - address of star forest
172 
173    Level: intermediate
174 
175 .seealso: PetscSFCreate(), PetscSFReset()
176 @*/
177 PetscErrorCode PetscSFDestroy(PetscSF *sf)
178 {
179   PetscErrorCode ierr;
180 
181   PetscFunctionBegin;
182   if (!*sf) PetscFunctionReturn(0);
183   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
184   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
185   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
186   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
187   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
188   PetscFunctionReturn(0);
189 }
190 
191 /*@
192    PetscSFSetUp - set up communication structures
193 
194    Collective
195 
196    Input Arguments:
197 .  sf - star forest communication object
198 
199    Level: beginner
200 
201 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
202 @*/
203 PetscErrorCode PetscSFSetUp(PetscSF sf)
204 {
205   PetscErrorCode ierr;
206 
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
209   PetscSFCheckGraphSet(sf,1);
210   if (sf->setupcalled) PetscFunctionReturn(0);
211   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
212   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
213   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
214   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
215   sf->setupcalled = PETSC_TRUE;
216   PetscFunctionReturn(0);
217 }
218 
219 /*@
220    PetscSFSetFromOptions - set PetscSF options using the options database
221 
222    Logically Collective
223 
224    Input Arguments:
225 .  sf - star forest
226 
227    Options Database Keys:
228 +  -sf_type - implementation type, see PetscSFSetType()
229 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
230 
231    Level: intermediate
232 
233 .keywords: KSP, set, from, options, database
234 
235 .seealso: PetscSFWindowSetSyncType()
236 @*/
237 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
238 {
239   PetscSFType    deft;
240   char           type[256];
241   PetscErrorCode ierr;
242   PetscBool      flg;
243 
244   PetscFunctionBegin;
245   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
246   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
247   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
248   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
249   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
250   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
251   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 /*@
257    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
258 
259    Logically Collective
260 
261    Input Arguments:
262 +  sf - star forest
263 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
264 
265    Level: advanced
266 
267 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
268 @*/
269 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
270 {
271 
272   PetscFunctionBegin;
273   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
274   PetscValidLogicalCollectiveBool(sf,flg,2);
275   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
276   sf->rankorder = flg;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    PetscSFSetGraph - Set a parallel star forest
282 
283    Collective
284 
285    Input Arguments:
286 +  sf - star forest
287 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
288 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
289 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
290 .  localmode - copy mode for ilocal
291 .  iremote - remote locations of root vertices for each leaf on the current process
292 -  remotemode - copy mode for iremote
293 
294    Level: intermediate
295 
296    Notes: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
297 
298 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
299 @*/
300 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
301 {
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
306   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
307   if (nleaves > 0) PetscValidPointer(iremote,6);
308   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
309   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
310 
311   ierr = PetscSFReset(sf);CHKERRQ(ierr);
312   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
313 
314   sf->nroots  = nroots;
315   sf->nleaves = nleaves;
316 
317   if (nleaves && ilocal) {
318     PetscInt i;
319     PetscInt minleaf = PETSC_MAX_INT;
320     PetscInt maxleaf = PETSC_MIN_INT;
321     for (i=0; i<nleaves; i++) {
322       minleaf = PetscMin(minleaf,ilocal[i]);
323       maxleaf = PetscMax(maxleaf,ilocal[i]);
324     }
325     sf->minleaf = minleaf;
326     sf->maxleaf = maxleaf;
327   } else {
328     sf->minleaf = 0;
329     sf->maxleaf = nleaves - 1;
330   }
331 
332   if (ilocal) {
333     switch (localmode) {
334     case PETSC_COPY_VALUES:
335       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
336       ierr = PetscMemcpy(sf->mine_alloc,ilocal,nleaves*sizeof(*ilocal));CHKERRQ(ierr);
337       sf->mine = sf->mine_alloc;
338       break;
339     case PETSC_OWN_POINTER:
340       sf->mine_alloc = (PetscInt*)ilocal;
341       sf->mine       = sf->mine_alloc;
342       break;
343     case PETSC_USE_POINTER:
344       sf->mine_alloc = NULL;
345       sf->mine       = (PetscInt*)ilocal;
346       break;
347     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
348     }
349   }
350 
351   switch (remotemode) {
352   case PETSC_COPY_VALUES:
353     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
354     ierr = PetscMemcpy(sf->remote_alloc,iremote,nleaves*sizeof(*iremote));CHKERRQ(ierr);
355     sf->remote = sf->remote_alloc;
356     break;
357   case PETSC_OWN_POINTER:
358     sf->remote_alloc = (PetscSFNode*)iremote;
359     sf->remote       = sf->remote_alloc;
360     break;
361   case PETSC_USE_POINTER:
362     sf->remote_alloc = NULL;
363     sf->remote       = (PetscSFNode*)iremote;
364     break;
365   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
366   }
367 
368   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
369   sf->graphset = PETSC_TRUE;
370   PetscFunctionReturn(0);
371 }
372 
373 /*@
374    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
375 
376    Collective
377 
378    Input Arguments:
379 .  sf - star forest to invert
380 
381    Output Arguments:
382 .  isf - inverse of sf
383 
384    Level: advanced
385 
386    Notes:
387    All roots must have degree 1.
388 
389    The local space may be a permutation, but cannot be sparse.
390 
391 .seealso: PetscSFSetGraph()
392 @*/
393 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
394 {
395   PetscErrorCode ierr;
396   PetscMPIInt    rank;
397   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
398   const PetscInt *ilocal;
399   PetscSFNode    *roots,*leaves;
400 
401   PetscFunctionBegin;
402   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
403   PetscSFCheckGraphSet(sf,1);
404   PetscValidPointer(isf,2);
405 
406   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
407   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
408 
409   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
410   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
411   for (i=0; i<maxlocal; i++) {
412     leaves[i].rank  = rank;
413     leaves[i].index = i;
414   }
415   for (i=0; i <nroots; i++) {
416     roots[i].rank  = -1;
417     roots[i].index = -1;
418   }
419   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
420   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
421 
422   /* Check whether our leaves are sparse */
423   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
424   if (count == nroots) newilocal = NULL;
425   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
426     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
427     for (i=0,count=0; i<nroots; i++) {
428       if (roots[i].rank >= 0) {
429         newilocal[count]   = i;
430         roots[count].rank  = roots[i].rank;
431         roots[count].index = roots[i].index;
432         count++;
433       }
434     }
435   }
436 
437   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
438   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
439   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 /*@
444    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
445 
446    Collective
447 
448    Input Arguments:
449 +  sf - communication object to duplicate
450 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
451 
452    Output Arguments:
453 .  newsf - new communication object
454 
455    Level: beginner
456 
457 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
458 @*/
459 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
460 {
461   PetscSFType    type;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
466   PetscValidLogicalCollectiveEnum(sf,opt,2);
467   PetscValidPointer(newsf,3);
468   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
469   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
470   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
471   if (opt == PETSCSF_DUPLICATE_GRAPH) {
472     PetscInt          nroots,nleaves;
473     const PetscInt    *ilocal;
474     const PetscSFNode *iremote;
475     PetscSFCheckGraphSet(sf,1);
476     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
477     ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
478   }
479   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
480   PetscFunctionReturn(0);
481 }
482 
483 /*@C
484    PetscSFGetGraph - Get the graph specifying a parallel star forest
485 
486    Not Collective
487 
488    Input Arguments:
489 .  sf - star forest
490 
491    Output Arguments:
492 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
493 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
494 .  ilocal - locations of leaves in leafdata buffers
495 -  iremote - remote locations of root vertices for each leaf on the current process
496 
497    Level: intermediate
498 
499 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
500 @*/
501 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
502 {
503 
504   PetscFunctionBegin;
505   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
506   /* We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set */
507   /* PetscSFCheckGraphSet(sf,1); */
508   if (nroots) *nroots = sf->nroots;
509   if (nleaves) *nleaves = sf->nleaves;
510   if (ilocal) *ilocal = sf->mine;
511   if (iremote) *iremote = sf->remote;
512   PetscFunctionReturn(0);
513 }
514 
515 /*@
516    PetscSFGetLeafRange - Get the active leaf ranges
517 
518    Not Collective
519 
520    Input Arguments:
521 .  sf - star forest
522 
523    Output Arguments:
524 +  minleaf - minimum active leaf on this process
525 -  maxleaf - maximum active leaf on this process
526 
527    Level: developer
528 
529 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
530 @*/
531 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
532 {
533 
534   PetscFunctionBegin;
535   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
536   PetscSFCheckGraphSet(sf,1);
537   if (minleaf) *minleaf = sf->minleaf;
538   if (maxleaf) *maxleaf = sf->maxleaf;
539   PetscFunctionReturn(0);
540 }
541 
542 /*@C
543    PetscSFView - view a star forest
544 
545    Collective
546 
547    Input Arguments:
548 +  sf - star forest
549 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
550 
551    Level: beginner
552 
553 .seealso: PetscSFCreate(), PetscSFSetGraph()
554 @*/
555 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
556 {
557   PetscErrorCode    ierr;
558   PetscBool         iascii;
559   PetscViewerFormat format;
560 
561   PetscFunctionBegin;
562   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
563   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
564   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
565   PetscCheckSameComm(sf,1,viewer,2);
566   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
567   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
568   if (iascii) {
569     PetscMPIInt rank;
570     PetscInt    ii,i,j;
571 
572     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
573     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
574     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
575     if (!sf->graphset) {
576       ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
577       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
578       PetscFunctionReturn(0);
579     }
580     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
581     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
582     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
583     for (i=0; i<sf->nleaves; i++) {
584       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
585     }
586     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
587     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
588     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
589       PetscMPIInt *tmpranks,*perm;
590       ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
591       ierr = PetscMemcpy(tmpranks,sf->ranks,sf->nranks*sizeof(tmpranks[0]));CHKERRQ(ierr);
592       for (i=0; i<sf->nranks; i++) perm[i] = i;
593       ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
594       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
595       for (ii=0; ii<sf->nranks; ii++) {
596         i = perm[ii];
597         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
598         for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
599           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
600         }
601       }
602       ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
603     }
604     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
605     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
606     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 /*@C
612    PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process
613 
614    Not Collective
615 
616    Input Arguments:
617 .  sf - star forest
618 
619    Output Arguments:
620 +  nranks - number of ranks referenced by local part
621 .  ranks - array of ranks
622 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
623 .  rmine - concatenated array holding local indices referencing each remote rank
624 -  rremote - concatenated array holding remote indices referenced for each remote rank
625 
626    Level: developer
627 
628 .seealso: PetscSFSetGraph()
629 @*/
630 PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
631 {
632 
633   PetscFunctionBegin;
634   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
635   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
636   if (nranks)  *nranks  = sf->nranks;
637   if (ranks)   *ranks   = sf->ranks;
638   if (roffset) *roffset = sf->roffset;
639   if (rmine)   *rmine   = sf->rmine;
640   if (rremote) *rremote = sf->rremote;
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
645   PetscInt i;
646   for (i=0; i<n; i++) {
647     if (needle == list[i]) return PETSC_TRUE;
648   }
649   return PETSC_FALSE;
650 }
651 
652 /*@C
653    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
654 
655    Collective
656 
657    Input Arguments:
658 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
659 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
660 
661    Level: developer
662 
663 .seealso: PetscSFGetRanks()
664 @*/
665 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
666 {
667   PetscErrorCode     ierr;
668   PetscTable         table;
669   PetscTablePosition pos;
670   PetscMPIInt        size,groupsize,*groupranks;
671   PetscInt           i,*rcount,*ranks;
672 
673   PetscFunctionBegin;
674   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
675   PetscSFCheckGraphSet(sf,1);
676   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
677   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
678   for (i=0; i<sf->nleaves; i++) {
679     /* Log 1-based rank */
680     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
681   }
682   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
683   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
684   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
685   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
686   for (i=0; i<sf->nranks; i++) {
687     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
688     ranks[i]--;             /* Convert back to 0-based */
689   }
690   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
691 
692   /* We expect that dgroup is reliably "small" while nranks could be large */
693   {
694     MPI_Group group = MPI_GROUP_NULL;
695     PetscMPIInt *dgroupranks;
696     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
697     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
698     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
699     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
700     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
701     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
702     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
703     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
704   }
705 
706   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
707   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
708     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
709       if (InList(ranks[i],groupsize,groupranks)) break;
710     }
711     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
712       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
713     }
714     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
715       PetscInt    tmprank,tmpcount;
716       tmprank = ranks[i];
717       tmpcount = rcount[i];
718       ranks[i] = ranks[sf->ndranks];
719       rcount[i] = rcount[sf->ndranks];
720       ranks[sf->ndranks] = tmprank;
721       rcount[sf->ndranks] = tmpcount;
722       sf->ndranks++;
723     }
724   }
725   ierr = PetscFree(groupranks);CHKERRQ(ierr);
726   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
727   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
728   sf->roffset[0] = 0;
729   for (i=0; i<sf->nranks; i++) {
730     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
731     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
732     rcount[i]        = 0;
733   }
734   for (i=0; i<sf->nleaves; i++) {
735     PetscInt irank;
736     /* Search for index of iremote[i].rank in sf->ranks */
737     ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
738     if (irank < 0) {
739       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
740       if (irank >= 0) irank += sf->ndranks;
741     }
742     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
743     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
744     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
745     rcount[irank]++;
746   }
747   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 /*@C
752    PetscSFGetGroups - gets incoming and outgoing process groups
753 
754    Collective
755 
756    Input Argument:
757 .  sf - star forest
758 
759    Output Arguments:
760 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
761 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
762 
763    Level: developer
764 
765 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
766 @*/
767 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
768 {
769   PetscErrorCode ierr;
770   MPI_Group      group = MPI_GROUP_NULL;
771 
772   PetscFunctionBegin;
773   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
774   if (sf->ingroup == MPI_GROUP_NULL) {
775     PetscInt       i;
776     const PetscInt *indegree;
777     PetscMPIInt    rank,*outranks,*inranks;
778     PetscSFNode    *remote;
779     PetscSF        bgcount;
780 
781     /* Compute the number of incoming ranks */
782     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
783     for (i=0; i<sf->nranks; i++) {
784       remote[i].rank  = sf->ranks[i];
785       remote[i].index = 0;
786     }
787     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
788     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
789     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
790     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
791 
792     /* Enumerate the incoming ranks */
793     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
794     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
795     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
796     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
797     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
798     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
799     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
800     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
801     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
802     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
803   }
804   *incoming = sf->ingroup;
805 
806   if (sf->outgroup == MPI_GROUP_NULL) {
807     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
808     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
809     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
810   }
811   *outgoing = sf->outgroup;
812   PetscFunctionReturn(0);
813 }
814 
815 /*@
816    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
817 
818    Collective
819 
820    Input Argument:
821 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
822 
823    Output Arguments:
824 .  multi - star forest with split roots, such that each root has degree exactly 1
825 
826    Level: developer
827 
828    Notes:
829 
830    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
831    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
832    edge, it is a candidate for future optimization that might involve its removal.
833 
834 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
835 @*/
836 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
837 {
838   PetscErrorCode ierr;
839 
840   PetscFunctionBegin;
841   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
842   PetscValidPointer(multi,2);
843   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
844     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
845     *multi = sf->multi;
846     PetscFunctionReturn(0);
847   }
848   if (!sf->multi) {
849     const PetscInt *indegree;
850     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
851     PetscSFNode    *remote;
852     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
853     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
854     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
855     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
856     inoffset[0] = 0;
857     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
858     for (i=0; i<maxlocal; i++) outones[i] = 1;
859     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
860     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
861     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
862 #if 0
863 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
864     for (i=0; i<sf->nroots; i++) {
865       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
866     }
867 #endif
868 #endif
869     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
870     for (i=0; i<sf->nleaves; i++) {
871       remote[i].rank  = sf->remote[i].rank;
872       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
873     }
874     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
875     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
876     if (sf->rankorder) {        /* Sort the ranks */
877       PetscMPIInt rank;
878       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
879       PetscSFNode *newremote;
880       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
881       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
882       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
883       for (i=0; i<maxlocal; i++) outranks[i] = rank;
884       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
885       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
886       /* Sort the incoming ranks at each vertex, build the inverse map */
887       for (i=0; i<sf->nroots; i++) {
888         PetscInt j;
889         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
890         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
891         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
892       }
893       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
894       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
895       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
896       for (i=0; i<sf->nleaves; i++) {
897         newremote[i].rank  = sf->remote[i].rank;
898         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
899       }
900       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
901       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
902     }
903     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
904   }
905   *multi = sf->multi;
906   PetscFunctionReturn(0);
907 }
908 
909 /*@C
910    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
911 
912    Collective
913 
914    Input Arguments:
915 +  sf - original star forest
916 .  nroots - number of roots to select on this process
917 -  selected - selected roots on this process
918 
919    Output Arguments:
920 .  newsf - new star forest
921 
922    Level: advanced
923 
924    Note:
925    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
926    be done by calling PetscSFGetGraph().
927 
928 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
929 @*/
930 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf)
931 {
932   PetscInt      *rootdata, *leafdata, *ilocal;
933   PetscSFNode   *iremote;
934   PetscInt       leafsize = 0, nleaves = 0, n, i;
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
939   PetscSFCheckGraphSet(sf,1);
940   if (nroots) PetscValidPointer(selected,3);
941   PetscValidPointer(newsf,4);
942   if (sf->mine) for (i = 0; i < sf->nleaves; ++i) {leafsize = PetscMax(leafsize, sf->mine[i]+1);}
943   else leafsize = sf->nleaves;
944   ierr = PetscCalloc2(sf->nroots,&rootdata,leafsize,&leafdata);CHKERRQ(ierr);
945   for (i=0; i<nroots; ++i) rootdata[selected[i]] = 1;
946   ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
947   ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
948   for (i = 0; i < leafsize; ++i) nleaves += leafdata[i];
949   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
950   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
951   for (i = 0, n = 0; i < sf->nleaves; ++i) {
952     const PetscInt lidx = sf->mine ? sf->mine[i] : i;
953 
954     if (leafdata[lidx]) {
955       ilocal[n]        = lidx;
956       iremote[n].rank  = sf->remote[i].rank;
957       iremote[n].index = sf->remote[i].index;
958       ++n;
959     }
960   }
961   if (n != nleaves) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "There is a size mismatch in the SF embedding, %d != %d", n, nleaves);
962   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,newsf);CHKERRQ(ierr);
963   ierr = PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
964   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 /*@C
969   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
970 
971   Collective
972 
973   Input Arguments:
974 + sf - original star forest
975 . nleaves - number of leaves to select on this process
976 - selected - selected leaves on this process
977 
978   Output Arguments:
979 .  newsf - new star forest
980 
981   Level: advanced
982 
983 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
984 @*/
985 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nleaves, const PetscInt *selected, PetscSF *newsf)
986 {
987   PetscSFNode   *iremote;
988   PetscInt      *ilocal;
989   PetscInt       i;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
994   PetscSFCheckGraphSet(sf, 1);
995   if (nleaves) PetscValidPointer(selected, 3);
996   PetscValidPointer(newsf, 4);
997   ierr = PetscMalloc1(nleaves, &ilocal);CHKERRQ(ierr);
998   ierr = PetscMalloc1(nleaves, &iremote);CHKERRQ(ierr);
999   for (i = 0; i < nleaves; ++i) {
1000     const PetscInt l = selected[i];
1001 
1002     ilocal[i]        = sf->mine ? sf->mine[l] : l;
1003     iremote[i].rank  = sf->remote[l].rank;
1004     iremote[i].index = sf->remote[l].index;
1005   }
1006   ierr = PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, newsf);CHKERRQ(ierr);
1007   ierr = PetscSFSetGraph(*newsf, sf->nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /*@C
1012    PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd()
1013 
1014    Collective on PetscSF
1015 
1016    Input Arguments:
1017 +  sf - star forest on which to communicate
1018 .  unit - data type associated with each node
1019 -  rootdata - buffer to broadcast
1020 
1021    Output Arguments:
1022 .  leafdata - buffer to update with values from each leaf's respective root
1023 
1024    Level: intermediate
1025 
1026 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin()
1027 @*/
1028 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1029 {
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1034   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1035   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
1036   ierr = (*sf->ops->BcastBegin)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1037   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 /*@C
1042    PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin()
1043 
1044    Collective
1045 
1046    Input Arguments:
1047 +  sf - star forest
1048 .  unit - data type
1049 -  rootdata - buffer to broadcast
1050 
1051    Output Arguments:
1052 .  leafdata - buffer to update with values from each leaf's respective root
1053 
1054    Level: intermediate
1055 
1056 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1057 @*/
1058 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1059 {
1060   PetscErrorCode ierr;
1061 
1062   PetscFunctionBegin;
1063   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1064   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1065   ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);
1066   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1067   ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*@C
1072    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1073 
1074    Collective
1075 
1076    Input Arguments:
1077 +  sf - star forest
1078 .  unit - data type
1079 .  leafdata - values to reduce
1080 -  op - reduction operation
1081 
1082    Output Arguments:
1083 .  rootdata - result of reduction of values from all leaves of each root
1084 
1085    Level: intermediate
1086 
1087 .seealso: PetscSFBcastBegin()
1088 @*/
1089 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1090 {
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1095   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1096   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1097   ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1098   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /*@C
1103    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1104 
1105    Collective
1106 
1107    Input Arguments:
1108 +  sf - star forest
1109 .  unit - data type
1110 .  leafdata - values to reduce
1111 -  op - reduction operation
1112 
1113    Output Arguments:
1114 .  rootdata - result of reduction of values from all leaves of each root
1115 
1116    Level: intermediate
1117 
1118 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1119 @*/
1120 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1121 {
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1126   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1127   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1128   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1129   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /*@C
1134    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1135 
1136    Collective
1137 
1138    Input Arguments:
1139 .  sf - star forest
1140 
1141    Output Arguments:
1142 .  degree - degree of each root vertex
1143 
1144    Level: advanced
1145 
1146 .seealso: PetscSFGatherBegin()
1147 @*/
1148 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1149 {
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1154   PetscSFCheckGraphSet(sf,1);
1155   PetscValidPointer(degree,2);
1156   if (!sf->degreeknown) {
1157     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1158     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1159     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1160     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1161     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1162     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1163     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1164   }
1165   *degree = NULL;
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 /*@C
1170    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1171 
1172    Collective
1173 
1174    Input Arguments:
1175 .  sf - star forest
1176 
1177    Output Arguments:
1178 .  degree - degree of each root vertex
1179 
1180    Level: developer
1181 
1182 .seealso:
1183 @*/
1184 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1185 {
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1190   PetscSFCheckGraphSet(sf,1);
1191   PetscValidPointer(degree,2);
1192   if (!sf->degreeknown) {
1193     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1194     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1195     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1196     sf->degreeknown = PETSC_TRUE;
1197   }
1198   *degree = sf->degree;
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 /*@C
1203    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1204 
1205    Collective
1206 
1207    Input Arguments:
1208 +  sf - star forest
1209 .  unit - data type
1210 .  leafdata - leaf values to use in reduction
1211 -  op - operation to use for reduction
1212 
1213    Output Arguments:
1214 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1215 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1216 
1217    Level: advanced
1218 
1219    Note:
1220    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1221    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1222    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1223    integers.
1224 
1225 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1226 @*/
1227 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1228 {
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1233   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1234   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1235   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1236   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*@C
1241    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1242 
1243    Collective
1244 
1245    Input Arguments:
1246 +  sf - star forest
1247 .  unit - data type
1248 .  leafdata - leaf values to use in reduction
1249 -  op - operation to use for reduction
1250 
1251    Output Arguments:
1252 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1253 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1254 
1255    Level: advanced
1256 
1257 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1258 @*/
1259 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1260 {
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1265   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1266   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1267   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1268   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 /*@C
1273    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1274 
1275    Collective
1276 
1277    Input Arguments:
1278 +  sf - star forest
1279 .  unit - data type
1280 -  leafdata - leaf data to gather to roots
1281 
1282    Output Argument:
1283 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1284 
1285    Level: intermediate
1286 
1287 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1288 @*/
1289 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1290 {
1291   PetscErrorCode ierr;
1292   PetscSF        multi;
1293 
1294   PetscFunctionBegin;
1295   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1296   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1297   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1298   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /*@C
1303    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1304 
1305    Collective
1306 
1307    Input Arguments:
1308 +  sf - star forest
1309 .  unit - data type
1310 -  leafdata - leaf data to gather to roots
1311 
1312    Output Argument:
1313 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1314 
1315    Level: intermediate
1316 
1317 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1318 @*/
1319 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1320 {
1321   PetscErrorCode ierr;
1322   PetscSF        multi;
1323 
1324   PetscFunctionBegin;
1325   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1326   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1327   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1328   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /*@C
1333    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1334 
1335    Collective
1336 
1337    Input Arguments:
1338 +  sf - star forest
1339 .  unit - data type
1340 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1341 
1342    Output Argument:
1343 .  leafdata - leaf data to be update with personal data from each respective root
1344 
1345    Level: intermediate
1346 
1347 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1348 @*/
1349 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1350 {
1351   PetscErrorCode ierr;
1352   PetscSF        multi;
1353 
1354   PetscFunctionBegin;
1355   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1356   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1357   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1358   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 /*@C
1363    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1364 
1365    Collective
1366 
1367    Input Arguments:
1368 +  sf - star forest
1369 .  unit - data type
1370 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1371 
1372    Output Argument:
1373 .  leafdata - leaf data to be update with personal data from each respective root
1374 
1375    Level: intermediate
1376 
1377 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1378 @*/
1379 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1380 {
1381   PetscErrorCode ierr;
1382   PetscSF        multi;
1383 
1384   PetscFunctionBegin;
1385   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1386   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1387   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1388   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*@
1393   PetscSFCompose - Compose a new PetscSF equivalent to action to PetscSFs
1394 
1395   Input Parameters:
1396 + sfA - The first PetscSF
1397 - sfB - The second PetscSF
1398 
1399   Output Parameters:
1400 . sfBA - equvalent PetscSF for applying A then B
1401 
1402   Level: developer
1403 
1404 .seealso: PetscSF, PetscSFGetGraph(), PetscSFSetGraph()
1405 @*/
1406 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1407 {
1408   MPI_Comm           comm;
1409   const PetscSFNode *remotePointsA, *remotePointsB;
1410   PetscSFNode       *remotePointsBA;
1411   const PetscInt    *localPointsA, *localPointsB;
1412   PetscInt           numRootsA, numLeavesA, numRootsB, numLeavesB;
1413   PetscErrorCode     ierr;
1414 
1415   PetscFunctionBegin;
1416   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
1417   PetscSFCheckGraphSet(sfA, 1);
1418   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
1419   PetscSFCheckGraphSet(sfB, 2);
1420   PetscValidPointer(sfBA, 3);
1421   ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr);
1422   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1423   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1424   ierr = PetscMalloc1(numLeavesB, &remotePointsBA);CHKERRQ(ierr);
1425   ierr = PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1426   ierr = PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1427   ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr);
1428   ierr = PetscSFSetGraph(*sfBA, numRootsA, numLeavesB, localPointsB, PETSC_COPY_VALUES, remotePointsBA, PETSC_OWN_POINTER);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431