1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 #if defined(PETSC_USE_DEBUG) 209 PetscInt i, nleaves; 210 PetscMPIInt size; 211 const PetscInt *ilocal; 212 const PetscSFNode *iremote; 213 PetscErrorCode ierr; 214 215 PetscFunctionBegin; 216 if (!sf->graphset) PetscFunctionReturn(0); 217 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 218 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 219 for (i = 0; i < nleaves; i++) { 220 const PetscInt rank = iremote[i].rank; 221 const PetscInt remote = iremote[i].index; 222 const PetscInt leaf = ilocal ? ilocal[i] : i; 223 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 224 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 225 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 226 } 227 PetscFunctionReturn(0); 228 #else 229 PetscFunctionBegin; 230 PetscFunctionReturn(0); 231 #endif 232 } 233 234 /*@ 235 PetscSFSetUp - set up communication structures 236 237 Collective 238 239 Input Arguments: 240 . sf - star forest communication object 241 242 Level: beginner 243 244 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 245 @*/ 246 PetscErrorCode PetscSFSetUp(PetscSF sf) 247 { 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 252 PetscSFCheckGraphSet(sf,1); 253 if (sf->setupcalled) PetscFunctionReturn(0); 254 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 255 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 256 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 257 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 258 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 259 sf->setupcalled = PETSC_TRUE; 260 PetscFunctionReturn(0); 261 } 262 263 /*@ 264 PetscSFSetFromOptions - set PetscSF options using the options database 265 266 Logically Collective 267 268 Input Arguments: 269 . sf - star forest 270 271 Options Database Keys: 272 + -sf_type - implementation type, see PetscSFSetType() 273 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 274 - -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 275 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller. 276 277 Level: intermediate 278 @*/ 279 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 280 { 281 PetscSFType deft; 282 char type[256]; 283 PetscErrorCode ierr; 284 PetscBool flg; 285 286 PetscFunctionBegin; 287 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 288 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 289 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 290 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 291 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 292 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 293 sf->use_default_stream = PETSC_FALSE; 294 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 295 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 296 ierr = PetscOptionsEnd();CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 /*@ 301 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 302 303 Logically Collective 304 305 Input Arguments: 306 + sf - star forest 307 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 308 309 Level: advanced 310 311 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 312 @*/ 313 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 314 { 315 PetscFunctionBegin; 316 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 317 PetscValidLogicalCollectiveBool(sf,flg,2); 318 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 319 sf->rankorder = flg; 320 PetscFunctionReturn(0); 321 } 322 323 /*@ 324 PetscSFSetGraph - Set a parallel star forest 325 326 Collective 327 328 Input Arguments: 329 + sf - star forest 330 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 331 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 332 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 333 during setup in debug mode) 334 . localmode - copy mode for ilocal 335 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 336 during setup in debug mode) 337 - remotemode - copy mode for iremote 338 339 Level: intermediate 340 341 Notes: 342 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 343 344 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 345 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 346 needed 347 348 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 349 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 350 351 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 352 @*/ 353 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 354 { 355 PetscErrorCode ierr; 356 357 PetscFunctionBegin; 358 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 359 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 360 if (nleaves > 0) PetscValidPointer(iremote,6); 361 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 362 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 363 364 if (sf->nroots >= 0) { /* Reset only if graph already set */ 365 ierr = PetscSFReset(sf);CHKERRQ(ierr); 366 } 367 368 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 369 370 sf->nroots = nroots; 371 sf->nleaves = nleaves; 372 373 if (nleaves && ilocal) { 374 PetscInt i; 375 PetscInt minleaf = PETSC_MAX_INT; 376 PetscInt maxleaf = PETSC_MIN_INT; 377 int contiguous = 1; 378 for (i=0; i<nleaves; i++) { 379 minleaf = PetscMin(minleaf,ilocal[i]); 380 maxleaf = PetscMax(maxleaf,ilocal[i]); 381 contiguous &= (ilocal[i] == i); 382 } 383 sf->minleaf = minleaf; 384 sf->maxleaf = maxleaf; 385 if (contiguous) { 386 if (localmode == PETSC_OWN_POINTER) { 387 ierr = PetscFree(ilocal);CHKERRQ(ierr); 388 } 389 ilocal = NULL; 390 } 391 } else { 392 sf->minleaf = 0; 393 sf->maxleaf = nleaves - 1; 394 } 395 396 if (ilocal) { 397 switch (localmode) { 398 case PETSC_COPY_VALUES: 399 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 400 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 401 sf->mine = sf->mine_alloc; 402 break; 403 case PETSC_OWN_POINTER: 404 sf->mine_alloc = (PetscInt*)ilocal; 405 sf->mine = sf->mine_alloc; 406 break; 407 case PETSC_USE_POINTER: 408 sf->mine_alloc = NULL; 409 sf->mine = (PetscInt*)ilocal; 410 break; 411 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 412 } 413 } 414 415 switch (remotemode) { 416 case PETSC_COPY_VALUES: 417 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 418 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 419 sf->remote = sf->remote_alloc; 420 break; 421 case PETSC_OWN_POINTER: 422 sf->remote_alloc = (PetscSFNode*)iremote; 423 sf->remote = sf->remote_alloc; 424 break; 425 case PETSC_USE_POINTER: 426 sf->remote_alloc = NULL; 427 sf->remote = (PetscSFNode*)iremote; 428 break; 429 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 430 } 431 432 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 433 sf->graphset = PETSC_TRUE; 434 PetscFunctionReturn(0); 435 } 436 437 /*@ 438 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 439 440 Collective 441 442 Input Parameters: 443 + sf - The PetscSF 444 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 445 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 446 447 Notes: 448 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 449 n and N are local and global sizes of x respectively. 450 451 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 452 sequential vectors y on all ranks. 453 454 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 455 sequential vector y on rank 0. 456 457 In above cases, entries of x are roots and entries of y are leaves. 458 459 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 460 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 461 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 462 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 463 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 464 465 In this case, roots and leaves are symmetric. 466 467 Level: intermediate 468 @*/ 469 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 470 { 471 MPI_Comm comm; 472 PetscInt n,N,res[2]; 473 PetscMPIInt rank,size; 474 PetscSFType type; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 479 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 480 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 482 483 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 484 type = PETSCSFALLTOALL; 485 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 486 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 487 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 488 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 489 } else { 490 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 491 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 492 res[0] = n; 493 res[1] = -n; 494 /* Check if n are same over all ranks so that we can optimize it */ 495 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 496 if (res[0] == -res[1]) { /* same n */ 497 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 498 } else { 499 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 500 } 501 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 502 } 503 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 504 505 sf->pattern = pattern; 506 sf->mine = NULL; /* Contiguous */ 507 508 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 509 Also set other easy stuff. 510 */ 511 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 512 sf->nleaves = N; 513 sf->nroots = n; 514 sf->nranks = size; 515 sf->minleaf = 0; 516 sf->maxleaf = N - 1; 517 } else if (pattern == PETSCSF_PATTERN_GATHER) { 518 sf->nleaves = rank ? 0 : N; 519 sf->nroots = n; 520 sf->nranks = rank ? 0 : size; 521 sf->minleaf = 0; 522 sf->maxleaf = rank ? -1 : N - 1; 523 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 524 sf->nleaves = size; 525 sf->nroots = size; 526 sf->nranks = size; 527 sf->minleaf = 0; 528 sf->maxleaf = size - 1; 529 } 530 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 531 sf->graphset = PETSC_TRUE; 532 PetscFunctionReturn(0); 533 } 534 535 /*@ 536 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 537 538 Collective 539 540 Input Arguments: 541 542 . sf - star forest to invert 543 544 Output Arguments: 545 . isf - inverse of sf 546 Level: advanced 547 548 Notes: 549 All roots must have degree 1. 550 551 The local space may be a permutation, but cannot be sparse. 552 553 .seealso: PetscSFSetGraph() 554 @*/ 555 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 556 { 557 PetscErrorCode ierr; 558 PetscMPIInt rank; 559 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 560 const PetscInt *ilocal; 561 PetscSFNode *roots,*leaves; 562 563 PetscFunctionBegin; 564 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 565 PetscSFCheckGraphSet(sf,1); 566 PetscValidPointer(isf,2); 567 568 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 569 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 570 571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 572 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 573 for (i=0; i<maxlocal; i++) { 574 leaves[i].rank = rank; 575 leaves[i].index = i; 576 } 577 for (i=0; i <nroots; i++) { 578 roots[i].rank = -1; 579 roots[i].index = -1; 580 } 581 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 582 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 583 584 /* Check whether our leaves are sparse */ 585 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 586 if (count == nroots) newilocal = NULL; 587 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 588 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 589 for (i=0,count=0; i<nroots; i++) { 590 if (roots[i].rank >= 0) { 591 newilocal[count] = i; 592 roots[count].rank = roots[i].rank; 593 roots[count].index = roots[i].index; 594 count++; 595 } 596 } 597 } 598 599 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 600 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 601 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 /*@ 606 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 607 608 Collective 609 610 Input Arguments: 611 + sf - communication object to duplicate 612 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 613 614 Output Arguments: 615 . newsf - new communication object 616 617 Level: beginner 618 619 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 620 @*/ 621 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 622 { 623 PetscSFType type; 624 PetscErrorCode ierr; 625 626 PetscFunctionBegin; 627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 628 PetscValidLogicalCollectiveEnum(sf,opt,2); 629 PetscValidPointer(newsf,3); 630 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 631 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 632 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 633 if (opt == PETSCSF_DUPLICATE_GRAPH) { 634 PetscSFCheckGraphSet(sf,1); 635 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 636 PetscInt nroots,nleaves; 637 const PetscInt *ilocal; 638 const PetscSFNode *iremote; 639 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 640 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 641 } else { 642 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 643 } 644 } 645 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 646 PetscFunctionReturn(0); 647 } 648 649 /*@C 650 PetscSFGetGraph - Get the graph specifying a parallel star forest 651 652 Not Collective 653 654 Input Arguments: 655 . sf - star forest 656 657 Output Arguments: 658 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 659 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 660 . ilocal - locations of leaves in leafdata buffers 661 - iremote - remote locations of root vertices for each leaf on the current process 662 663 Notes: 664 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 665 666 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 667 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 668 669 Level: intermediate 670 671 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 672 @*/ 673 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 674 { 675 PetscErrorCode ierr; 676 677 PetscFunctionBegin; 678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 679 if (sf->ops->GetGraph) { 680 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 681 } else { 682 if (nroots) *nroots = sf->nroots; 683 if (nleaves) *nleaves = sf->nleaves; 684 if (ilocal) *ilocal = sf->mine; 685 if (iremote) *iremote = sf->remote; 686 } 687 PetscFunctionReturn(0); 688 } 689 690 /*@ 691 PetscSFGetLeafRange - Get the active leaf ranges 692 693 Not Collective 694 695 Input Arguments: 696 . sf - star forest 697 698 Output Arguments: 699 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 700 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 701 702 Level: developer 703 704 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 705 @*/ 706 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 707 { 708 709 PetscFunctionBegin; 710 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 711 PetscSFCheckGraphSet(sf,1); 712 if (minleaf) *minleaf = sf->minleaf; 713 if (maxleaf) *maxleaf = sf->maxleaf; 714 PetscFunctionReturn(0); 715 } 716 717 /*@C 718 PetscSFViewFromOptions - View from Options 719 720 Collective on PetscSF 721 722 Input Parameters: 723 + A - the star forest 724 . obj - Optional object 725 - name - command line option 726 727 Level: intermediate 728 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 729 @*/ 730 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 731 { 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 736 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 /*@C 741 PetscSFView - view a star forest 742 743 Collective 744 745 Input Arguments: 746 + sf - star forest 747 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 748 749 Level: beginner 750 751 .seealso: PetscSFCreate(), PetscSFSetGraph() 752 @*/ 753 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 754 { 755 PetscErrorCode ierr; 756 PetscBool iascii; 757 PetscViewerFormat format; 758 759 PetscFunctionBegin; 760 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 761 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 762 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 763 PetscCheckSameComm(sf,1,viewer,2); 764 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 765 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 766 if (iascii) { 767 PetscMPIInt rank; 768 PetscInt ii,i,j; 769 770 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 771 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 772 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 773 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 774 if (!sf->graphset) { 775 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 776 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 780 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 781 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 782 for (i=0; i<sf->nleaves; i++) { 783 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 784 } 785 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 786 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 787 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 788 PetscMPIInt *tmpranks,*perm; 789 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 790 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 791 for (i=0; i<sf->nranks; i++) perm[i] = i; 792 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 794 for (ii=0; ii<sf->nranks; ii++) { 795 i = perm[ii]; 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 797 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 799 } 800 } 801 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 802 } 803 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 804 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 805 } 806 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 /*@C 812 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 813 814 Not Collective 815 816 Input Arguments: 817 . sf - star forest 818 819 Output Arguments: 820 + nranks - number of ranks referenced by local part 821 . ranks - array of ranks 822 . roffset - offset in rmine/rremote for each rank (length nranks+1) 823 . rmine - concatenated array holding local indices referencing each remote rank 824 - rremote - concatenated array holding remote indices referenced for each remote rank 825 826 Level: developer 827 828 .seealso: PetscSFGetLeafRanks() 829 @*/ 830 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 831 { 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 836 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 837 if (sf->ops->GetRootRanks) { 838 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 839 } else { 840 /* The generic implementation */ 841 if (nranks) *nranks = sf->nranks; 842 if (ranks) *ranks = sf->ranks; 843 if (roffset) *roffset = sf->roffset; 844 if (rmine) *rmine = sf->rmine; 845 if (rremote) *rremote = sf->rremote; 846 } 847 PetscFunctionReturn(0); 848 } 849 850 /*@C 851 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 852 853 Not Collective 854 855 Input Arguments: 856 . sf - star forest 857 858 Output Arguments: 859 + niranks - number of leaf ranks referencing roots on this process 860 . iranks - array of ranks 861 . ioffset - offset in irootloc for each rank (length niranks+1) 862 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 863 864 Level: developer 865 866 .seealso: PetscSFGetRootRanks() 867 @*/ 868 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 869 { 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 874 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 875 if (sf->ops->GetLeafRanks) { 876 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 877 } else { 878 PetscSFType type; 879 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 880 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 881 } 882 PetscFunctionReturn(0); 883 } 884 885 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 886 PetscInt i; 887 for (i=0; i<n; i++) { 888 if (needle == list[i]) return PETSC_TRUE; 889 } 890 return PETSC_FALSE; 891 } 892 893 /*@C 894 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 895 896 Collective 897 898 Input Arguments: 899 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 900 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 901 902 Level: developer 903 904 .seealso: PetscSFGetRootRanks() 905 @*/ 906 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 907 { 908 PetscErrorCode ierr; 909 PetscTable table; 910 PetscTablePosition pos; 911 PetscMPIInt size,groupsize,*groupranks; 912 PetscInt *rcount,*ranks; 913 PetscInt i, irank = -1,orank = -1; 914 915 PetscFunctionBegin; 916 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 917 PetscSFCheckGraphSet(sf,1); 918 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 919 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 920 for (i=0; i<sf->nleaves; i++) { 921 /* Log 1-based rank */ 922 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 923 } 924 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 925 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 926 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 927 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 928 for (i=0; i<sf->nranks; i++) { 929 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 930 ranks[i]--; /* Convert back to 0-based */ 931 } 932 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 933 934 /* We expect that dgroup is reliably "small" while nranks could be large */ 935 { 936 MPI_Group group = MPI_GROUP_NULL; 937 PetscMPIInt *dgroupranks; 938 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 939 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 940 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 941 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 942 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 943 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 944 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 945 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 946 } 947 948 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 949 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 950 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 951 if (InList(ranks[i],groupsize,groupranks)) break; 952 } 953 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 954 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 955 } 956 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 957 PetscInt tmprank,tmpcount; 958 959 tmprank = ranks[i]; 960 tmpcount = rcount[i]; 961 ranks[i] = ranks[sf->ndranks]; 962 rcount[i] = rcount[sf->ndranks]; 963 ranks[sf->ndranks] = tmprank; 964 rcount[sf->ndranks] = tmpcount; 965 sf->ndranks++; 966 } 967 } 968 ierr = PetscFree(groupranks);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 970 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 971 sf->roffset[0] = 0; 972 for (i=0; i<sf->nranks; i++) { 973 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 974 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 975 rcount[i] = 0; 976 } 977 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 978 /* short circuit */ 979 if (orank != sf->remote[i].rank) { 980 /* Search for index of iremote[i].rank in sf->ranks */ 981 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 982 if (irank < 0) { 983 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 984 if (irank >= 0) irank += sf->ndranks; 985 } 986 orank = sf->remote[i].rank; 987 } 988 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 989 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 990 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 991 rcount[irank]++; 992 } 993 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 /*@C 998 PetscSFGetGroups - gets incoming and outgoing process groups 999 1000 Collective 1001 1002 Input Argument: 1003 . sf - star forest 1004 1005 Output Arguments: 1006 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1007 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1008 1009 Level: developer 1010 1011 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1012 @*/ 1013 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1014 { 1015 PetscErrorCode ierr; 1016 MPI_Group group = MPI_GROUP_NULL; 1017 1018 PetscFunctionBegin; 1019 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1020 if (sf->ingroup == MPI_GROUP_NULL) { 1021 PetscInt i; 1022 const PetscInt *indegree; 1023 PetscMPIInt rank,*outranks,*inranks; 1024 PetscSFNode *remote; 1025 PetscSF bgcount; 1026 1027 /* Compute the number of incoming ranks */ 1028 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1029 for (i=0; i<sf->nranks; i++) { 1030 remote[i].rank = sf->ranks[i]; 1031 remote[i].index = 0; 1032 } 1033 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1034 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1035 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1036 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1037 /* Enumerate the incoming ranks */ 1038 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1039 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1040 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1041 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1042 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1043 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1044 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1045 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1046 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1047 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1048 } 1049 *incoming = sf->ingroup; 1050 1051 if (sf->outgroup == MPI_GROUP_NULL) { 1052 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1053 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1054 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1055 } 1056 *outgoing = sf->outgroup; 1057 PetscFunctionReturn(0); 1058 } 1059 1060 /*@ 1061 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1062 1063 Collective 1064 1065 Input Argument: 1066 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1067 1068 Output Arguments: 1069 . multi - star forest with split roots, such that each root has degree exactly 1 1070 1071 Level: developer 1072 1073 Notes: 1074 1075 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1076 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1077 edge, it is a candidate for future optimization that might involve its removal. 1078 1079 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1080 @*/ 1081 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1082 { 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1087 PetscValidPointer(multi,2); 1088 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1089 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1090 *multi = sf->multi; 1091 PetscFunctionReturn(0); 1092 } 1093 if (!sf->multi) { 1094 const PetscInt *indegree; 1095 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1096 PetscSFNode *remote; 1097 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1098 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1099 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1100 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1101 inoffset[0] = 0; 1102 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1103 for (i=0; i<maxlocal; i++) outones[i] = 1; 1104 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1105 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1106 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1107 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1108 for (i=0; i<sf->nroots; i++) { 1109 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1110 } 1111 #endif 1112 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1113 for (i=0; i<sf->nleaves; i++) { 1114 remote[i].rank = sf->remote[i].rank; 1115 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1116 } 1117 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1118 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1119 if (sf->rankorder) { /* Sort the ranks */ 1120 PetscMPIInt rank; 1121 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1122 PetscSFNode *newremote; 1123 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1124 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1125 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1126 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1127 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1128 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1129 /* Sort the incoming ranks at each vertex, build the inverse map */ 1130 for (i=0; i<sf->nroots; i++) { 1131 PetscInt j; 1132 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1133 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1134 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1135 } 1136 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1137 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1139 for (i=0; i<sf->nleaves; i++) { 1140 newremote[i].rank = sf->remote[i].rank; 1141 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1142 } 1143 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1144 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1145 } 1146 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1147 } 1148 *multi = sf->multi; 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /*@C 1153 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1154 1155 Collective 1156 1157 Input Arguments: 1158 + sf - original star forest 1159 . nselected - number of selected roots on this process 1160 - selected - indices of the selected roots on this process 1161 1162 Output Arguments: 1163 . esf - new star forest 1164 1165 Level: advanced 1166 1167 Note: 1168 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1169 be done by calling PetscSFGetGraph(). 1170 1171 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1172 @*/ 1173 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1174 { 1175 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1176 const PetscInt *ilocal; 1177 signed char *rootdata,*leafdata,*leafmem; 1178 const PetscSFNode *iremote; 1179 PetscSFNode *new_iremote; 1180 MPI_Comm comm; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1185 PetscSFCheckGraphSet(sf,1); 1186 if (nselected) PetscValidPointer(selected,3); 1187 PetscValidPointer(esf,4); 1188 1189 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1190 1191 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1192 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1193 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1194 1195 #if defined(PETSC_USE_DEBUG) 1196 /* Error out if selected[] has dups or out of range indices */ 1197 { 1198 PetscBool dups; 1199 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1200 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1201 for (i=0; i<nselected; i++) 1202 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1203 } 1204 #endif 1205 1206 if (sf->ops->CreateEmbeddedSF) { 1207 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1208 } else { 1209 /* A generic version of creating embedded sf */ 1210 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1211 maxlocal = maxleaf - minleaf + 1; 1212 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1213 leafdata = leafmem - minleaf; 1214 /* Tag selected roots and bcast to leaves */ 1215 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1216 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1217 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1218 1219 /* Build esf with leaves that are still connected */ 1220 esf_nleaves = 0; 1221 for (i=0; i<nleaves; i++) { 1222 j = ilocal ? ilocal[i] : i; 1223 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1224 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1225 */ 1226 esf_nleaves += (leafdata[j] ? 1 : 0); 1227 } 1228 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1229 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1230 for (i=n=0; i<nleaves; i++) { 1231 j = ilocal ? ilocal[i] : i; 1232 if (leafdata[j]) { 1233 new_ilocal[n] = j; 1234 new_iremote[n].rank = iremote[i].rank; 1235 new_iremote[n].index = iremote[i].index; 1236 ++n; 1237 } 1238 } 1239 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1240 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1241 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1242 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1243 } 1244 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 /*@C 1249 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1250 1251 Collective 1252 1253 Input Arguments: 1254 + sf - original star forest 1255 . nselected - number of selected leaves on this process 1256 - selected - indices of the selected leaves on this process 1257 1258 Output Arguments: 1259 . newsf - new star forest 1260 1261 Level: advanced 1262 1263 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1264 @*/ 1265 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1266 { 1267 const PetscSFNode *iremote; 1268 PetscSFNode *new_iremote; 1269 const PetscInt *ilocal; 1270 PetscInt i,nroots,*leaves,*new_ilocal; 1271 MPI_Comm comm; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1276 PetscSFCheckGraphSet(sf,1); 1277 if (nselected) PetscValidPointer(selected,3); 1278 PetscValidPointer(newsf,4); 1279 1280 /* Uniq selected[] and put results in leaves[] */ 1281 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1282 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1283 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1284 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1285 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1286 1287 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1288 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1289 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1292 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1293 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1294 for (i=0; i<nselected; ++i) { 1295 const PetscInt l = leaves[i]; 1296 new_ilocal[i] = ilocal ? ilocal[l] : l; 1297 new_iremote[i].rank = iremote[l].rank; 1298 new_iremote[i].index = iremote[l].index; 1299 } 1300 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1301 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1302 } 1303 ierr = PetscFree(leaves);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /*@C 1308 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1309 1310 Collective on PetscSF 1311 1312 Input Arguments: 1313 + sf - star forest on which to communicate 1314 . unit - data type associated with each node 1315 . rootdata - buffer to broadcast 1316 - op - operation to use for reduction 1317 1318 Output Arguments: 1319 . leafdata - buffer to be reduced with values from each leaf's respective root 1320 1321 Level: intermediate 1322 1323 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1324 @*/ 1325 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1326 { 1327 PetscErrorCode ierr; 1328 PetscMemType rootmtype,leafmtype; 1329 1330 PetscFunctionBegin; 1331 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1332 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1333 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1334 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1335 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1336 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1337 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /*@C 1342 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1343 1344 Collective 1345 1346 Input Arguments: 1347 + sf - star forest 1348 . unit - data type 1349 . rootdata - buffer to broadcast 1350 - op - operation to use for reduction 1351 1352 Output Arguments: 1353 . leafdata - buffer to be reduced with values from each leaf's respective root 1354 1355 Level: intermediate 1356 1357 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1358 @*/ 1359 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1360 { 1361 PetscErrorCode ierr; 1362 PetscMemType rootmtype,leafmtype; 1363 1364 PetscFunctionBegin; 1365 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1366 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1367 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1368 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1369 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1370 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1371 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 /*@C 1376 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1377 1378 Collective 1379 1380 Input Arguments: 1381 + sf - star forest 1382 . unit - data type 1383 . leafdata - values to reduce 1384 - op - reduction operation 1385 1386 Output Arguments: 1387 . rootdata - result of reduction of values from all leaves of each root 1388 1389 Level: intermediate 1390 1391 .seealso: PetscSFBcastBegin() 1392 @*/ 1393 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1394 { 1395 PetscErrorCode ierr; 1396 PetscMemType rootmtype,leafmtype; 1397 1398 PetscFunctionBegin; 1399 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1400 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1401 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1402 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1403 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1404 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1405 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /*@C 1410 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1411 1412 Collective 1413 1414 Input Arguments: 1415 + sf - star forest 1416 . unit - data type 1417 . leafdata - values to reduce 1418 - op - reduction operation 1419 1420 Output Arguments: 1421 . rootdata - result of reduction of values from all leaves of each root 1422 1423 Level: intermediate 1424 1425 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1426 @*/ 1427 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1428 { 1429 PetscErrorCode ierr; 1430 PetscMemType rootmtype,leafmtype; 1431 1432 PetscFunctionBegin; 1433 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1434 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1435 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1436 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1437 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1438 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1439 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 /*@C 1444 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1445 1446 Collective 1447 1448 Input Arguments: 1449 + sf - star forest 1450 . unit - data type 1451 . leafdata - leaf values to use in reduction 1452 - op - operation to use for reduction 1453 1454 Output Arguments: 1455 + rootdata - root values to be updated, input state is seen by first process to perform an update 1456 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1457 1458 Level: advanced 1459 1460 Note: 1461 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1462 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1463 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1464 integers. 1465 1466 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1467 @*/ 1468 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1469 { 1470 PetscErrorCode ierr; 1471 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1472 1473 PetscFunctionBegin; 1474 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1475 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1476 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1477 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1478 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1479 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1480 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1481 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1482 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 /*@C 1487 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1488 1489 Collective 1490 1491 Input Arguments: 1492 + sf - star forest 1493 . unit - data type 1494 . leafdata - leaf values to use in reduction 1495 - op - operation to use for reduction 1496 1497 Output Arguments: 1498 + rootdata - root values to be updated, input state is seen by first process to perform an update 1499 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1500 1501 Level: advanced 1502 1503 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1504 @*/ 1505 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1506 { 1507 PetscErrorCode ierr; 1508 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1509 1510 PetscFunctionBegin; 1511 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1512 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1513 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1514 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1515 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1516 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1517 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1518 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1519 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /*@C 1524 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1525 1526 Collective 1527 1528 Input Arguments: 1529 . sf - star forest 1530 1531 Output Arguments: 1532 . degree - degree of each root vertex 1533 1534 Level: advanced 1535 1536 Notes: 1537 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1538 1539 .seealso: PetscSFGatherBegin() 1540 @*/ 1541 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1542 { 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1547 PetscSFCheckGraphSet(sf,1); 1548 PetscValidPointer(degree,2); 1549 if (!sf->degreeknown) { 1550 PetscInt i, nroots = sf->nroots, maxlocal; 1551 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1552 maxlocal = sf->maxleaf-sf->minleaf+1; 1553 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1554 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1555 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1556 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1557 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1558 } 1559 *degree = NULL; 1560 PetscFunctionReturn(0); 1561 } 1562 1563 /*@C 1564 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1565 1566 Collective 1567 1568 Input Arguments: 1569 . sf - star forest 1570 1571 Output Arguments: 1572 . degree - degree of each root vertex 1573 1574 Level: developer 1575 1576 Notes: 1577 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1578 1579 .seealso: 1580 @*/ 1581 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1582 { 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1587 PetscSFCheckGraphSet(sf,1); 1588 PetscValidPointer(degree,2); 1589 if (!sf->degreeknown) { 1590 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1591 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1592 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1593 sf->degreeknown = PETSC_TRUE; 1594 } 1595 *degree = sf->degree; 1596 PetscFunctionReturn(0); 1597 } 1598 1599 1600 /*@C 1601 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1602 Each multi-root is assigned index of the corresponding original root. 1603 1604 Collective 1605 1606 Input Arguments: 1607 + sf - star forest 1608 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1609 1610 Output Arguments: 1611 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1612 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1613 1614 Level: developer 1615 1616 Notes: 1617 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1618 1619 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1620 @*/ 1621 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1622 { 1623 PetscSF msf; 1624 PetscInt i, j, k, nroots, nmroots; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1629 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1630 if (nroots) PetscValidIntPointer(degree,2); 1631 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1632 PetscValidPointer(multiRootsOrigNumbering,4); 1633 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1634 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1635 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1636 for (i=0,j=0,k=0; i<nroots; i++) { 1637 if (!degree[i]) continue; 1638 for (j=0; j<degree[i]; j++,k++) { 1639 (*multiRootsOrigNumbering)[k] = i; 1640 } 1641 } 1642 #if defined(PETSC_USE_DEBUG) 1643 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1644 #endif 1645 if (nMultiRoots) *nMultiRoots = nmroots; 1646 PetscFunctionReturn(0); 1647 } 1648 1649 /*@C 1650 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1651 1652 Collective 1653 1654 Input Arguments: 1655 + sf - star forest 1656 . unit - data type 1657 - leafdata - leaf data to gather to roots 1658 1659 Output Argument: 1660 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1661 1662 Level: intermediate 1663 1664 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1665 @*/ 1666 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1667 { 1668 PetscErrorCode ierr; 1669 PetscSF multi; 1670 1671 PetscFunctionBegin; 1672 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1673 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1674 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1675 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /*@C 1680 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1681 1682 Collective 1683 1684 Input Arguments: 1685 + sf - star forest 1686 . unit - data type 1687 - leafdata - leaf data to gather to roots 1688 1689 Output Argument: 1690 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1691 1692 Level: intermediate 1693 1694 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1695 @*/ 1696 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1697 { 1698 PetscErrorCode ierr; 1699 PetscSF multi; 1700 1701 PetscFunctionBegin; 1702 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1703 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1704 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1705 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 /*@C 1710 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1711 1712 Collective 1713 1714 Input Arguments: 1715 + sf - star forest 1716 . unit - data type 1717 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1718 1719 Output Argument: 1720 . leafdata - leaf data to be update with personal data from each respective root 1721 1722 Level: intermediate 1723 1724 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1725 @*/ 1726 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1727 { 1728 PetscErrorCode ierr; 1729 PetscSF multi; 1730 1731 PetscFunctionBegin; 1732 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1733 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1734 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1735 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 /*@C 1740 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1741 1742 Collective 1743 1744 Input Arguments: 1745 + sf - star forest 1746 . unit - data type 1747 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1748 1749 Output Argument: 1750 . leafdata - leaf data to be update with personal data from each respective root 1751 1752 Level: intermediate 1753 1754 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1755 @*/ 1756 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1757 { 1758 PetscErrorCode ierr; 1759 PetscSF multi; 1760 1761 PetscFunctionBegin; 1762 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1763 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1764 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1765 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1770 { 1771 #if defined(PETSC_USE_DEBUG) 1772 PetscInt i, n, nleaves; 1773 const PetscInt *ilocal = NULL; 1774 PetscHSetI seen; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1779 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1780 for (i = 0; i < nleaves; i++) { 1781 const PetscInt leaf = ilocal ? ilocal[i] : i; 1782 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1783 } 1784 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1785 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1786 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 #else 1789 PetscFunctionBegin; 1790 PetscFunctionReturn(0); 1791 #endif 1792 } 1793 1794 /*@ 1795 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1796 1797 Input Parameters: 1798 + sfA - The first PetscSF 1799 - sfB - The second PetscSF 1800 1801 Output Parameters: 1802 . sfBA - The composite SF 1803 1804 Level: developer 1805 1806 Notes: 1807 Currently, the two SFs must be defined on congruent communicators and they must be true star 1808 forests, i.e. the same leaf is not connected with different roots. 1809 1810 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1811 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1812 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1813 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1814 1815 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1816 @*/ 1817 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1818 { 1819 PetscErrorCode ierr; 1820 const PetscSFNode *remotePointsA,*remotePointsB; 1821 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1822 const PetscInt *localPointsA,*localPointsB; 1823 PetscInt *localPointsBA; 1824 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1825 PetscBool denseB; 1826 1827 PetscFunctionBegin; 1828 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1829 PetscSFCheckGraphSet(sfA,1); 1830 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1831 PetscSFCheckGraphSet(sfB,2); 1832 PetscCheckSameComm(sfA,1,sfB,2); 1833 PetscValidPointer(sfBA,3); 1834 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1835 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1836 1837 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1838 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1839 if (localPointsA) { 1840 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1841 for (i=0; i<numRootsB; i++) { 1842 reorderedRemotePointsA[i].rank = -1; 1843 reorderedRemotePointsA[i].index = -1; 1844 } 1845 for (i=0; i<numLeavesA; i++) { 1846 if (localPointsA[i] >= numRootsB) continue; 1847 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1848 } 1849 remotePointsA = reorderedRemotePointsA; 1850 } 1851 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1852 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1853 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1854 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1855 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1856 1857 denseB = (PetscBool)!localPointsB; 1858 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1859 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1860 else numLeavesBA++; 1861 } 1862 if (denseB) { 1863 localPointsBA = NULL; 1864 remotePointsBA = leafdataB; 1865 } else { 1866 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1867 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1868 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1869 const PetscInt l = localPointsB ? localPointsB[i] : i; 1870 1871 if (leafdataB[l-minleaf].rank == -1) continue; 1872 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1873 localPointsBA[numLeavesBA] = l; 1874 numLeavesBA++; 1875 } 1876 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1877 } 1878 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1879 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 /*@ 1884 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1885 1886 Input Parameters: 1887 + sfA - The first PetscSF 1888 - sfB - The second PetscSF 1889 1890 Output Parameters: 1891 . sfBA - The composite SF. 1892 1893 Level: developer 1894 1895 Notes: 1896 Currently, the two SFs must be defined on congruent communicators and they must be true star 1897 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1898 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1899 1900 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1901 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1902 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1903 a Reduce on sfB, on connected roots. 1904 1905 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1906 @*/ 1907 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1908 { 1909 PetscErrorCode ierr; 1910 const PetscSFNode *remotePointsA,*remotePointsB; 1911 PetscSFNode *remotePointsBA; 1912 const PetscInt *localPointsA,*localPointsB; 1913 PetscSFNode *reorderedRemotePointsA = NULL; 1914 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1915 MPI_Op op; 1916 #if defined(PETSC_USE_64BIT_INDICES) 1917 PetscBool iswin; 1918 #endif 1919 1920 PetscFunctionBegin; 1921 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1922 PetscSFCheckGraphSet(sfA,1); 1923 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1924 PetscSFCheckGraphSet(sfB,2); 1925 PetscCheckSameComm(sfA,1,sfB,2); 1926 PetscValidPointer(sfBA,3); 1927 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1928 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1929 1930 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1931 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1932 1933 /* TODO: Check roots of sfB have degree of 1 */ 1934 /* Once we implement it, we can replace the MPI_MAXLOC 1935 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1936 We use MPI_MAXLOC only to have a deterministic output from this routine if 1937 the root condition is not meet. 1938 */ 1939 op = MPI_MAXLOC; 1940 #if defined(PETSC_USE_64BIT_INDICES) 1941 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1942 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1943 if (iswin) op = MPIU_REPLACE; 1944 #endif 1945 1946 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1947 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1948 for (i=0; i<maxleaf - minleaf + 1; i++) { 1949 reorderedRemotePointsA[i].rank = -1; 1950 reorderedRemotePointsA[i].index = -1; 1951 } 1952 if (localPointsA) { 1953 for (i=0; i<numLeavesA; i++) { 1954 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1955 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1956 } 1957 } else { 1958 for (i=0; i<numLeavesA; i++) { 1959 if (i > maxleaf || i < minleaf) continue; 1960 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1961 } 1962 } 1963 1964 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1965 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1966 for (i=0; i<numRootsB; i++) { 1967 remotePointsBA[i].rank = -1; 1968 remotePointsBA[i].index = -1; 1969 } 1970 1971 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1972 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1973 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1974 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1975 if (remotePointsBA[i].rank == -1) continue; 1976 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1977 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1978 localPointsBA[numLeavesBA] = i; 1979 numLeavesBA++; 1980 } 1981 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1982 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /* 1987 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1988 1989 Input Parameters: 1990 . sf - The global PetscSF 1991 1992 Output Parameters: 1993 . out - The local PetscSF 1994 */ 1995 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1996 { 1997 MPI_Comm comm; 1998 PetscMPIInt myrank; 1999 const PetscInt *ilocal; 2000 const PetscSFNode *iremote; 2001 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2002 PetscSFNode *liremote; 2003 PetscSF lsf; 2004 PetscErrorCode ierr; 2005 2006 PetscFunctionBegin; 2007 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2008 if (sf->ops->CreateLocalSF) { 2009 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2010 } else { 2011 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2012 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2013 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2014 2015 /* Find out local edges and build a local SF */ 2016 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2017 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2018 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2019 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2020 2021 for (i=j=0; i<nleaves; i++) { 2022 if (iremote[i].rank == (PetscInt)myrank) { 2023 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2024 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2025 liremote[j].index = iremote[i].index; 2026 j++; 2027 } 2028 } 2029 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2030 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2031 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2032 *out = lsf; 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2038 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2039 { 2040 PetscErrorCode ierr; 2041 PetscMemType rootmtype,leafmtype; 2042 2043 PetscFunctionBegin; 2044 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2045 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2046 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2047 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2048 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2049 if (sf->ops->BcastToZero) { 2050 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2051 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2052 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056