1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h> 3*53dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 495fce210SBarry Smith #include <petscctable.h> 595fce210SBarry Smith 67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 77fd2d3dbSJunchao Zhang #include <cuda_runtime.h> 87fd2d3dbSJunchao Zhang #endif 97fd2d3dbSJunchao Zhang 107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 117fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h> 127fd2d3dbSJunchao Zhang #endif 137fd2d3dbSJunchao Zhang 1495fce210SBarry Smith #if defined(PETSC_USE_DEBUG) 1595fce210SBarry Smith # define PetscSFCheckGraphSet(sf,arg) do { \ 1695fce210SBarry Smith if (PetscUnlikely(!(sf)->graphset)) \ 17dd5b3ca6SJunchao Zhang SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 1895fce210SBarry Smith } while (0) 1995fce210SBarry Smith #else 2095fce210SBarry Smith # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 2195fce210SBarry Smith #endif 2295fce210SBarry Smith 234c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 2495fce210SBarry Smith 257fd2d3dbSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 267fd2d3dbSJunchao Zhang { 277fd2d3dbSJunchao Zhang PetscFunctionBegin; 287fd2d3dbSJunchao Zhang PetscValidPointer(type,2); 297fd2d3dbSJunchao Zhang *type = PETSC_MEMTYPE_HOST; 307fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 317fd2d3dbSJunchao Zhang if (PetscCUDAInitialized && data) { 327fd2d3dbSJunchao Zhang cudaError_t cerr; 337fd2d3dbSJunchao Zhang struct cudaPointerAttributes attr; 347fd2d3dbSJunchao Zhang enum cudaMemoryType mtype; 357fd2d3dbSJunchao Zhang cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 367fd2d3dbSJunchao Zhang cudaGetLastError(); /* Reset the last error */ 377fd2d3dbSJunchao Zhang #if (CUDART_VERSION < 10000) 387fd2d3dbSJunchao Zhang mtype = attr.memoryType; 397fd2d3dbSJunchao Zhang #else 407fd2d3dbSJunchao Zhang mtype = attr.type; 417fd2d3dbSJunchao Zhang #endif 427fd2d3dbSJunchao Zhang if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 437fd2d3dbSJunchao Zhang } 447fd2d3dbSJunchao Zhang #endif 457fd2d3dbSJunchao Zhang 467fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 477fd2d3dbSJunchao Zhang if (PetscHIPInitialized && data) { 487fd2d3dbSJunchao Zhang hipError_t cerr; 497fd2d3dbSJunchao Zhang struct hipPointerAttribute_t attr; 507fd2d3dbSJunchao Zhang enum hipMemoryType mtype; 5159af0bd3SScott Kruger cerr = hipPointerGetAttributes(&attr,data); 527fd2d3dbSJunchao Zhang hipGetLastError(); /* Reset the last error */ 537fd2d3dbSJunchao Zhang mtype = attr.memoryType; 547fd2d3dbSJunchao Zhang if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 557fd2d3dbSJunchao Zhang } 567fd2d3dbSJunchao Zhang #endif 577fd2d3dbSJunchao Zhang PetscFunctionReturn(0); 587fd2d3dbSJunchao Zhang } 597fd2d3dbSJunchao Zhang 608af6ec1cSBarry Smith /*@ 6195fce210SBarry Smith PetscSFCreate - create a star forest communication context 6295fce210SBarry Smith 63d083f849SBarry Smith Collective 6495fce210SBarry Smith 654165533cSJose E. Roman Input Parameter: 6695fce210SBarry Smith . comm - communicator on which the star forest will operate 6795fce210SBarry Smith 684165533cSJose E. Roman Output Parameter: 6995fce210SBarry Smith . sf - new star forest context 7095fce210SBarry Smith 71dd5b3ca6SJunchao Zhang Options Database Keys: 72dd5b3ca6SJunchao Zhang + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 73dd5b3ca6SJunchao Zhang . -sf_type window -Use MPI-3 one-sided window for communication 74dd5b3ca6SJunchao Zhang - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 75dd5b3ca6SJunchao Zhang 7695fce210SBarry Smith Level: intermediate 7795fce210SBarry Smith 78dd5b3ca6SJunchao Zhang Notes: 79dd5b3ca6SJunchao Zhang When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 80dd5b3ca6SJunchao Zhang MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 81dd5b3ca6SJunchao Zhang SFs are optimized and they have better performance than general SFs. 82dd5b3ca6SJunchao Zhang 83dd5b3ca6SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 8495fce210SBarry Smith @*/ 8595fce210SBarry Smith PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 8695fce210SBarry Smith { 8795fce210SBarry Smith PetscErrorCode ierr; 8895fce210SBarry Smith PetscSF b; 8995fce210SBarry Smith 9095fce210SBarry Smith PetscFunctionBegin; 9195fce210SBarry Smith PetscValidPointer(sf,2); 92607a6623SBarry Smith ierr = PetscSFInitializePackage();CHKERRQ(ierr); 9395fce210SBarry Smith 9473107ff1SLisandro Dalcin ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 9595fce210SBarry Smith 9695fce210SBarry Smith b->nroots = -1; 9795fce210SBarry Smith b->nleaves = -1; 9829046d53SLisandro Dalcin b->minleaf = PETSC_MAX_INT; 9929046d53SLisandro Dalcin b->maxleaf = PETSC_MIN_INT; 10095fce210SBarry Smith b->nranks = -1; 10195fce210SBarry Smith b->rankorder = PETSC_TRUE; 10295fce210SBarry Smith b->ingroup = MPI_GROUP_NULL; 10395fce210SBarry Smith b->outgroup = MPI_GROUP_NULL; 10495fce210SBarry Smith b->graphset = PETSC_FALSE; 10520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 10620c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi; 10720c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE; 10871438e86SJunchao Zhang b->unknown_input_stream= PETSC_FALSE; 10927f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 11020c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS; 11127f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA) 11227f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA; 11359af0bd3SScott Kruger #elif defined(PETSC_HAVE_HIP) 11459af0bd3SScott Kruger b->backend = PETSCSF_BACKEND_HIP; 11520c24465SJunchao Zhang #endif 11671438e86SJunchao Zhang 11771438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 11871438e86SJunchao Zhang b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 11971438e86SJunchao Zhang b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 12071438e86SJunchao Zhang ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 12171438e86SJunchao Zhang ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 12271438e86SJunchao Zhang #endif 12320c24465SJunchao Zhang #endif 12460c22052SBarry Smith b->vscat.from_n = -1; 12560c22052SBarry Smith b->vscat.to_n = -1; 12660c22052SBarry Smith b->vscat.unit = MPIU_SCALAR; 12795fce210SBarry Smith *sf = b; 12895fce210SBarry Smith PetscFunctionReturn(0); 12995fce210SBarry Smith } 13095fce210SBarry Smith 13129046d53SLisandro Dalcin /*@ 13295fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 13395fce210SBarry Smith 13495fce210SBarry Smith Collective 13595fce210SBarry Smith 1364165533cSJose E. Roman Input Parameter: 13795fce210SBarry Smith . sf - star forest 13895fce210SBarry Smith 13995fce210SBarry Smith Level: advanced 14095fce210SBarry Smith 14195fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 14295fce210SBarry Smith @*/ 14395fce210SBarry Smith PetscErrorCode PetscSFReset(PetscSF sf) 14495fce210SBarry Smith { 14595fce210SBarry Smith PetscErrorCode ierr; 14695fce210SBarry Smith 14795fce210SBarry Smith PetscFunctionBegin; 14895fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 14979715d56SJed Brown if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 15029046d53SLisandro Dalcin sf->nroots = -1; 15129046d53SLisandro Dalcin sf->nleaves = -1; 15229046d53SLisandro Dalcin sf->minleaf = PETSC_MAX_INT; 15329046d53SLisandro Dalcin sf->maxleaf = PETSC_MIN_INT; 15495fce210SBarry Smith sf->mine = NULL; 15595fce210SBarry Smith sf->remote = NULL; 15629046d53SLisandro Dalcin sf->graphset = PETSC_FALSE; 15729046d53SLisandro Dalcin ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 15895fce210SBarry Smith ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 15921c688dcSJed Brown sf->nranks = -1; 16029046d53SLisandro Dalcin ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 16129046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE; 16295fce210SBarry Smith ierr = PetscFree(sf->degree);CHKERRQ(ierr); 163ffc4695bSBarry Smith if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 164ffc4695bSBarry Smith if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 165013b3241SStefano Zampini if (sf->multi) sf->multi->multi = NULL; 16695fce210SBarry Smith ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 167dd5b3ca6SJunchao Zhang ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 16871438e86SJunchao Zhang 16971438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 17071438e86SJunchao Zhang for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 17171438e86SJunchao Zhang #endif 17271438e86SJunchao Zhang 17395fce210SBarry Smith sf->setupcalled = PETSC_FALSE; 17495fce210SBarry Smith PetscFunctionReturn(0); 17595fce210SBarry Smith } 17695fce210SBarry Smith 17795fce210SBarry Smith /*@C 17829046d53SLisandro Dalcin PetscSFSetType - Set the PetscSF communication implementation 17995fce210SBarry Smith 18095fce210SBarry Smith Collective on PetscSF 18195fce210SBarry Smith 18295fce210SBarry Smith Input Parameters: 18395fce210SBarry Smith + sf - the PetscSF context 18495fce210SBarry Smith - type - a known method 18595fce210SBarry Smith 18695fce210SBarry Smith Options Database Key: 18795fce210SBarry Smith . -sf_type <type> - Sets the method; use -help for a list 18870616304SStefano Zampini of available methods (for instance, window, basic, neighbor) 18995fce210SBarry Smith 19095fce210SBarry Smith Notes: 19195fce210SBarry Smith See "include/petscsf.h" for available methods (for instance) 19295fce210SBarry Smith + PETSCSFWINDOW - MPI-2/3 one-sided 19395fce210SBarry Smith - PETSCSFBASIC - basic implementation using MPI-1 two-sided 19495fce210SBarry Smith 19595fce210SBarry Smith Level: intermediate 19695fce210SBarry Smith 19795fce210SBarry Smith .seealso: PetscSFType, PetscSFCreate() 19895fce210SBarry Smith @*/ 19995fce210SBarry Smith PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 20095fce210SBarry Smith { 20195fce210SBarry Smith PetscErrorCode ierr,(*r)(PetscSF); 20295fce210SBarry Smith PetscBool match; 20395fce210SBarry Smith 20495fce210SBarry Smith PetscFunctionBegin; 20595fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 20695fce210SBarry Smith PetscValidCharPointer(type,2); 20795fce210SBarry Smith 20895fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 20995fce210SBarry Smith if (match) PetscFunctionReturn(0); 21095fce210SBarry Smith 211adc40e5bSBarry Smith ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 21295fce210SBarry Smith if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 21329046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */ 21429046d53SLisandro Dalcin if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 21595fce210SBarry Smith ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 21695fce210SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 21795fce210SBarry Smith ierr = (*r)(sf);CHKERRQ(ierr); 21895fce210SBarry Smith PetscFunctionReturn(0); 21995fce210SBarry Smith } 22095fce210SBarry Smith 22129046d53SLisandro Dalcin /*@C 22229046d53SLisandro Dalcin PetscSFGetType - Get the PetscSF communication implementation 22329046d53SLisandro Dalcin 22429046d53SLisandro Dalcin Not Collective 22529046d53SLisandro Dalcin 22629046d53SLisandro Dalcin Input Parameter: 22729046d53SLisandro Dalcin . sf - the PetscSF context 22829046d53SLisandro Dalcin 22929046d53SLisandro Dalcin Output Parameter: 23029046d53SLisandro Dalcin . type - the PetscSF type name 23129046d53SLisandro Dalcin 23229046d53SLisandro Dalcin Level: intermediate 23329046d53SLisandro Dalcin 23429046d53SLisandro Dalcin .seealso: PetscSFSetType(), PetscSFCreate() 23529046d53SLisandro Dalcin @*/ 23629046d53SLisandro Dalcin PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 23729046d53SLisandro Dalcin { 23829046d53SLisandro Dalcin PetscFunctionBegin; 23929046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 24029046d53SLisandro Dalcin PetscValidPointer(type,2); 24129046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name; 24229046d53SLisandro Dalcin PetscFunctionReturn(0); 24329046d53SLisandro Dalcin } 24429046d53SLisandro Dalcin 2451fb7b255SJunchao Zhang /*@C 24695fce210SBarry Smith PetscSFDestroy - destroy star forest 24795fce210SBarry Smith 24895fce210SBarry Smith Collective 24995fce210SBarry Smith 2504165533cSJose E. Roman Input Parameter: 25195fce210SBarry Smith . sf - address of star forest 25295fce210SBarry Smith 25395fce210SBarry Smith Level: intermediate 25495fce210SBarry Smith 25595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFReset() 25695fce210SBarry Smith @*/ 25795fce210SBarry Smith PetscErrorCode PetscSFDestroy(PetscSF *sf) 25895fce210SBarry Smith { 25995fce210SBarry Smith PetscErrorCode ierr; 26095fce210SBarry Smith 26195fce210SBarry Smith PetscFunctionBegin; 26295fce210SBarry Smith if (!*sf) PetscFunctionReturn(0); 26395fce210SBarry Smith PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 26429046d53SLisandro Dalcin if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 26595fce210SBarry Smith ierr = PetscSFReset(*sf);CHKERRQ(ierr); 26695fce210SBarry Smith if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 26797929ea7SJunchao Zhang ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 26855b25c41SPierre Jolivet if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 26995fce210SBarry Smith ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 27095fce210SBarry Smith PetscFunctionReturn(0); 27195fce210SBarry Smith } 27295fce210SBarry Smith 273c4e6a40aSLawrence Mitchell static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 274c4e6a40aSLawrence Mitchell { 275c4e6a40aSLawrence Mitchell PetscInt i, nleaves; 276c4e6a40aSLawrence Mitchell PetscMPIInt size; 277c4e6a40aSLawrence Mitchell const PetscInt *ilocal; 278c4e6a40aSLawrence Mitchell const PetscSFNode *iremote; 279c4e6a40aSLawrence Mitchell PetscErrorCode ierr; 280c4e6a40aSLawrence Mitchell 281c4e6a40aSLawrence Mitchell PetscFunctionBegin; 28276bd3646SJed Brown if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 283c4e6a40aSLawrence Mitchell ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 284ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 285c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) { 286c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank; 287c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index; 288c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 289c4e6a40aSLawrence Mitchell if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 290c4e6a40aSLawrence Mitchell if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 291c4e6a40aSLawrence Mitchell if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 292c4e6a40aSLawrence Mitchell } 293c4e6a40aSLawrence Mitchell PetscFunctionReturn(0); 294c4e6a40aSLawrence Mitchell } 295c4e6a40aSLawrence Mitchell 29695fce210SBarry Smith /*@ 29795fce210SBarry Smith PetscSFSetUp - set up communication structures 29895fce210SBarry Smith 29995fce210SBarry Smith Collective 30095fce210SBarry Smith 3014165533cSJose E. Roman Input Parameter: 30295fce210SBarry Smith . sf - star forest communication object 30395fce210SBarry Smith 30495fce210SBarry Smith Level: beginner 30595fce210SBarry Smith 30695fce210SBarry Smith .seealso: PetscSFSetFromOptions(), PetscSFSetType() 30795fce210SBarry Smith @*/ 30895fce210SBarry Smith PetscErrorCode PetscSFSetUp(PetscSF sf) 30995fce210SBarry Smith { 31095fce210SBarry Smith PetscErrorCode ierr; 31195fce210SBarry Smith 31295fce210SBarry Smith PetscFunctionBegin; 31329046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 31429046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 31595fce210SBarry Smith if (sf->setupcalled) PetscFunctionReturn(0); 31629046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 31720c24465SJunchao Zhang ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 31820c24465SJunchao Zhang if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 31995fce210SBarry Smith if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 32020c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 32120c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) { 32271438e86SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_CUDA; 32371438e86SJunchao Zhang sf->ops->Free = PetscSFFree_CUDA; 32420c24465SJunchao Zhang } 32520c24465SJunchao Zhang #endif 32659af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 32759af0bd3SScott Kruger if (sf->backend == PETSCSF_BACKEND_HIP) { 32859af0bd3SScott Kruger sf->ops->Malloc = PetscSFMalloc_HIP; 32959af0bd3SScott Kruger sf->ops->Free = PetscSFFree_HIP; 33059af0bd3SScott Kruger } 33159af0bd3SScott Kruger #endif 33220c24465SJunchao Zhang 33359af0bd3SScott Kruger # 33420c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 33520c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 33620c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos; 33720c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos; 33820c24465SJunchao Zhang } 33920c24465SJunchao Zhang #endif 34029046d53SLisandro Dalcin ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 34195fce210SBarry Smith sf->setupcalled = PETSC_TRUE; 34295fce210SBarry Smith PetscFunctionReturn(0); 34395fce210SBarry Smith } 34495fce210SBarry Smith 3458af6ec1cSBarry Smith /*@ 34695fce210SBarry Smith PetscSFSetFromOptions - set PetscSF options using the options database 34795fce210SBarry Smith 34895fce210SBarry Smith Logically Collective 34995fce210SBarry Smith 3504165533cSJose E. Roman Input Parameter: 35195fce210SBarry Smith . sf - star forest 35295fce210SBarry Smith 35395fce210SBarry Smith Options Database Keys: 35460263706SJed Brown + -sf_type - implementation type, see PetscSFSetType() 35551ccb202SJunchao Zhang . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 356b85e67b7SJunchao Zhang . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 357c2a741eeSJunchao Zhang use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 358c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 35920c24465SJunchao Zhang . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 360c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 36195fce210SBarry Smith 36259af0bd3SScott Kruger - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 36359af0bd3SScott Kruger On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 36420c24465SJunchao Zhang the only available is kokkos. 36520c24465SJunchao Zhang 36695fce210SBarry Smith Level: intermediate 36795fce210SBarry Smith @*/ 36895fce210SBarry Smith PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 36995fce210SBarry Smith { 37095fce210SBarry Smith PetscSFType deft; 37195fce210SBarry Smith char type[256]; 37295fce210SBarry Smith PetscErrorCode ierr; 37395fce210SBarry Smith PetscBool flg; 37495fce210SBarry Smith 37595fce210SBarry Smith PetscFunctionBegin; 37695fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 37795fce210SBarry Smith ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 37895fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 37929046d53SLisandro Dalcin ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 38095fce210SBarry Smith ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 38195fce210SBarry Smith ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 3827fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 38320c24465SJunchao Zhang { 38420c24465SJunchao Zhang char backendstr[32] = {0}; 38559af0bd3SScott Kruger PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 38620c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */ 38771438e86SJunchao Zhang ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 388b85e67b7SJunchao Zhang ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 38920c24465SJunchao Zhang ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 39020c24465SJunchao Zhang ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 39120c24465SJunchao Zhang ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 39259af0bd3SScott Kruger ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 39359af0bd3SScott Kruger #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 39420c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 39520c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 39659af0bd3SScott Kruger else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 39759af0bd3SScott Kruger else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 39820c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS) 39920c24465SJunchao Zhang if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 40020c24465SJunchao Zhang #endif 40120c24465SJunchao Zhang } 402c2a741eeSJunchao Zhang #endif 403e55864a3SBarry Smith if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 40495fce210SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 40595fce210SBarry Smith PetscFunctionReturn(0); 40695fce210SBarry Smith } 40795fce210SBarry Smith 40829046d53SLisandro Dalcin /*@ 40995fce210SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 41095fce210SBarry Smith 41195fce210SBarry Smith Logically Collective 41295fce210SBarry Smith 4134165533cSJose E. Roman Input Parameters: 41495fce210SBarry Smith + sf - star forest 41595fce210SBarry Smith - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 41695fce210SBarry Smith 41795fce210SBarry Smith Level: advanced 41895fce210SBarry Smith 41995fce210SBarry Smith .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 42095fce210SBarry Smith @*/ 42195fce210SBarry Smith PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 42295fce210SBarry Smith { 42395fce210SBarry Smith PetscFunctionBegin; 42495fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 42595fce210SBarry Smith PetscValidLogicalCollectiveBool(sf,flg,2); 42695fce210SBarry Smith if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 42795fce210SBarry Smith sf->rankorder = flg; 42895fce210SBarry Smith PetscFunctionReturn(0); 42995fce210SBarry Smith } 43095fce210SBarry Smith 4318af6ec1cSBarry Smith /*@ 43295fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest 43395fce210SBarry Smith 43495fce210SBarry Smith Collective 43595fce210SBarry Smith 4364165533cSJose E. Roman Input Parameters: 43795fce210SBarry Smith + sf - star forest 43895fce210SBarry Smith . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 43995fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 440c4e6a40aSLawrence Mitchell . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 441c4e6a40aSLawrence Mitchell during setup in debug mode) 44295fce210SBarry Smith . localmode - copy mode for ilocal 443c4e6a40aSLawrence Mitchell . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 444c4e6a40aSLawrence Mitchell during setup in debug mode) 44595fce210SBarry Smith - remotemode - copy mode for iremote 44695fce210SBarry Smith 44795fce210SBarry Smith Level: intermediate 44895fce210SBarry Smith 44995452b02SPatrick Sanan Notes: 45095452b02SPatrick Sanan In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 45138ab3f8aSBarry Smith 4522ad1e87fSLisandro Dalcin Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 4532ad1e87fSLisandro Dalcin encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 4542ad1e87fSLisandro Dalcin needed 4552ad1e87fSLisandro Dalcin 456c4e6a40aSLawrence Mitchell Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 457c4e6a40aSLawrence Mitchell indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 458c4e6a40aSLawrence Mitchell 45995fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 46095fce210SBarry Smith @*/ 46195fce210SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 46295fce210SBarry Smith { 46395fce210SBarry Smith PetscErrorCode ierr; 46495fce210SBarry Smith 46595fce210SBarry Smith PetscFunctionBegin; 46695fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 46729046d53SLisandro Dalcin if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 46829046d53SLisandro Dalcin if (nleaves > 0) PetscValidPointer(iremote,6); 46929046d53SLisandro Dalcin if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 47095fce210SBarry Smith if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 47129046d53SLisandro Dalcin 4722a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */ 47395fce210SBarry Smith ierr = PetscSFReset(sf);CHKERRQ(ierr); 4742a67d2daSStefano Zampini } 4752a67d2daSStefano Zampini 47629046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 47729046d53SLisandro Dalcin 47895fce210SBarry Smith sf->nroots = nroots; 47995fce210SBarry Smith sf->nleaves = nleaves; 48029046d53SLisandro Dalcin 48129046d53SLisandro Dalcin if (nleaves && ilocal) { 48221c688dcSJed Brown PetscInt i; 48329046d53SLisandro Dalcin PetscInt minleaf = PETSC_MAX_INT; 48429046d53SLisandro Dalcin PetscInt maxleaf = PETSC_MIN_INT; 4852ad1e87fSLisandro Dalcin int contiguous = 1; 48629046d53SLisandro Dalcin for (i=0; i<nleaves; i++) { 48729046d53SLisandro Dalcin minleaf = PetscMin(minleaf,ilocal[i]); 48829046d53SLisandro Dalcin maxleaf = PetscMax(maxleaf,ilocal[i]); 4892ad1e87fSLisandro Dalcin contiguous &= (ilocal[i] == i); 49029046d53SLisandro Dalcin } 49129046d53SLisandro Dalcin sf->minleaf = minleaf; 49229046d53SLisandro Dalcin sf->maxleaf = maxleaf; 4932ad1e87fSLisandro Dalcin if (contiguous) { 4942ad1e87fSLisandro Dalcin if (localmode == PETSC_OWN_POINTER) { 4952ad1e87fSLisandro Dalcin ierr = PetscFree(ilocal);CHKERRQ(ierr); 4962ad1e87fSLisandro Dalcin } 4972ad1e87fSLisandro Dalcin ilocal = NULL; 4982ad1e87fSLisandro Dalcin } 49929046d53SLisandro Dalcin } else { 50029046d53SLisandro Dalcin sf->minleaf = 0; 50129046d53SLisandro Dalcin sf->maxleaf = nleaves - 1; 50229046d53SLisandro Dalcin } 50329046d53SLisandro Dalcin 50429046d53SLisandro Dalcin if (ilocal) { 50595fce210SBarry Smith switch (localmode) { 50695fce210SBarry Smith case PETSC_COPY_VALUES: 507785e854fSJed Brown ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 508580bdb30SBarry Smith ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 50995fce210SBarry Smith sf->mine = sf->mine_alloc; 51095fce210SBarry Smith break; 51195fce210SBarry Smith case PETSC_OWN_POINTER: 51295fce210SBarry Smith sf->mine_alloc = (PetscInt*)ilocal; 51395fce210SBarry Smith sf->mine = sf->mine_alloc; 51495fce210SBarry Smith break; 51595fce210SBarry Smith case PETSC_USE_POINTER: 51629046d53SLisandro Dalcin sf->mine_alloc = NULL; 51795fce210SBarry Smith sf->mine = (PetscInt*)ilocal; 51895fce210SBarry Smith break; 51995fce210SBarry Smith default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 52095fce210SBarry Smith } 52195fce210SBarry Smith } 52229046d53SLisandro Dalcin 52395fce210SBarry Smith switch (remotemode) { 52495fce210SBarry Smith case PETSC_COPY_VALUES: 525785e854fSJed Brown ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 526580bdb30SBarry Smith ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 52795fce210SBarry Smith sf->remote = sf->remote_alloc; 52895fce210SBarry Smith break; 52995fce210SBarry Smith case PETSC_OWN_POINTER: 53095fce210SBarry Smith sf->remote_alloc = (PetscSFNode*)iremote; 53195fce210SBarry Smith sf->remote = sf->remote_alloc; 53295fce210SBarry Smith break; 53395fce210SBarry Smith case PETSC_USE_POINTER: 53429046d53SLisandro Dalcin sf->remote_alloc = NULL; 53595fce210SBarry Smith sf->remote = (PetscSFNode*)iremote; 53695fce210SBarry Smith break; 53795fce210SBarry Smith default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 53895fce210SBarry Smith } 53995fce210SBarry Smith 540563ffbabSMatthew G. Knepley ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 54129046d53SLisandro Dalcin sf->graphset = PETSC_TRUE; 54295fce210SBarry Smith PetscFunctionReturn(0); 54395fce210SBarry Smith } 54495fce210SBarry Smith 54529046d53SLisandro Dalcin /*@ 546dd5b3ca6SJunchao Zhang PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 547dd5b3ca6SJunchao Zhang 548dd5b3ca6SJunchao Zhang Collective 549dd5b3ca6SJunchao Zhang 550dd5b3ca6SJunchao Zhang Input Parameters: 551dd5b3ca6SJunchao Zhang + sf - The PetscSF 552dd5b3ca6SJunchao Zhang . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 553dd5b3ca6SJunchao Zhang - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 554dd5b3ca6SJunchao Zhang 555dd5b3ca6SJunchao Zhang Notes: 556dd5b3ca6SJunchao Zhang It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 557dd5b3ca6SJunchao Zhang n and N are local and global sizes of x respectively. 558dd5b3ca6SJunchao Zhang 559dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 560dd5b3ca6SJunchao Zhang sequential vectors y on all ranks. 561dd5b3ca6SJunchao Zhang 562dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 563dd5b3ca6SJunchao Zhang sequential vector y on rank 0. 564dd5b3ca6SJunchao Zhang 565dd5b3ca6SJunchao Zhang In above cases, entries of x are roots and entries of y are leaves. 566dd5b3ca6SJunchao Zhang 567dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 568dd5b3ca6SJunchao Zhang creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 569dd5b3ca6SJunchao Zhang of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 570dd5b3ca6SJunchao Zhang not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 571dd5b3ca6SJunchao Zhang items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 572dd5b3ca6SJunchao Zhang 573dd5b3ca6SJunchao Zhang In this case, roots and leaves are symmetric. 574dd5b3ca6SJunchao Zhang 575dd5b3ca6SJunchao Zhang Level: intermediate 576dd5b3ca6SJunchao Zhang @*/ 577dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 578dd5b3ca6SJunchao Zhang { 579dd5b3ca6SJunchao Zhang MPI_Comm comm; 580dd5b3ca6SJunchao Zhang PetscInt n,N,res[2]; 581dd5b3ca6SJunchao Zhang PetscMPIInt rank,size; 582dd5b3ca6SJunchao Zhang PetscSFType type; 583dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 584dd5b3ca6SJunchao Zhang 585dd5b3ca6SJunchao Zhang PetscFunctionBegin; 586dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 587dd5b3ca6SJunchao Zhang if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 588ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 589ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 590dd5b3ca6SJunchao Zhang 591dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) { 592dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL; 593dd5b3ca6SJunchao Zhang ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 594dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 595dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 596dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 597dd5b3ca6SJunchao Zhang } else { 598dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 599dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 600dd5b3ca6SJunchao Zhang res[0] = n; 601dd5b3ca6SJunchao Zhang res[1] = -n; 602dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */ 603820f2d46SBarry Smith ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 604dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */ 605dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 606dd5b3ca6SJunchao Zhang } else { 607dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 608dd5b3ca6SJunchao Zhang } 609dd5b3ca6SJunchao Zhang ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 610dd5b3ca6SJunchao Zhang } 611dd5b3ca6SJunchao Zhang ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 612dd5b3ca6SJunchao Zhang 613dd5b3ca6SJunchao Zhang sf->pattern = pattern; 614dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */ 615dd5b3ca6SJunchao Zhang 616dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 617dd5b3ca6SJunchao Zhang Also set other easy stuff. 618dd5b3ca6SJunchao Zhang */ 619dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) { 620dd5b3ca6SJunchao Zhang sf->nleaves = N; 621dd5b3ca6SJunchao Zhang sf->nroots = n; 622dd5b3ca6SJunchao Zhang sf->nranks = size; 623dd5b3ca6SJunchao Zhang sf->minleaf = 0; 624dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1; 625dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) { 626dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N; 627dd5b3ca6SJunchao Zhang sf->nroots = n; 628dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size; 629dd5b3ca6SJunchao Zhang sf->minleaf = 0; 630dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1; 631dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 632dd5b3ca6SJunchao Zhang sf->nleaves = size; 633dd5b3ca6SJunchao Zhang sf->nroots = size; 634dd5b3ca6SJunchao Zhang sf->nranks = size; 635dd5b3ca6SJunchao Zhang sf->minleaf = 0; 636dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1; 637dd5b3ca6SJunchao Zhang } 638dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 639dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE; 640dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 641dd5b3ca6SJunchao Zhang } 642dd5b3ca6SJunchao Zhang 643dd5b3ca6SJunchao Zhang /*@ 64495fce210SBarry Smith PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 64595fce210SBarry Smith 64695fce210SBarry Smith Collective 64795fce210SBarry Smith 6484165533cSJose E. Roman Input Parameter: 64995fce210SBarry Smith . sf - star forest to invert 65095fce210SBarry Smith 6514165533cSJose E. Roman Output Parameter: 65295fce210SBarry Smith . isf - inverse of sf 6534165533cSJose E. Roman 65495fce210SBarry Smith Level: advanced 65595fce210SBarry Smith 65695fce210SBarry Smith Notes: 65795fce210SBarry Smith All roots must have degree 1. 65895fce210SBarry Smith 65995fce210SBarry Smith The local space may be a permutation, but cannot be sparse. 66095fce210SBarry Smith 66195fce210SBarry Smith .seealso: PetscSFSetGraph() 66295fce210SBarry Smith @*/ 66395fce210SBarry Smith PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 66495fce210SBarry Smith { 66595fce210SBarry Smith PetscErrorCode ierr; 66695fce210SBarry Smith PetscMPIInt rank; 66795fce210SBarry Smith PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 66895fce210SBarry Smith const PetscInt *ilocal; 66995fce210SBarry Smith PetscSFNode *roots,*leaves; 67095fce210SBarry Smith 67195fce210SBarry Smith PetscFunctionBegin; 67229046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 67329046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 67429046d53SLisandro Dalcin PetscValidPointer(isf,2); 67529046d53SLisandro Dalcin 67695fce210SBarry Smith ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 67729046d53SLisandro Dalcin maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 67829046d53SLisandro Dalcin 679ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 680ae9aee6dSMatthew G. Knepley ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 681ae9aee6dSMatthew G. Knepley for (i=0; i<maxlocal; i++) { 68295fce210SBarry Smith leaves[i].rank = rank; 68395fce210SBarry Smith leaves[i].index = i; 68495fce210SBarry Smith } 68595fce210SBarry Smith for (i=0; i <nroots; i++) { 68695fce210SBarry Smith roots[i].rank = -1; 68795fce210SBarry Smith roots[i].index = -1; 68895fce210SBarry Smith } 68983df288dSJunchao Zhang ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 69083df288dSJunchao Zhang ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 69195fce210SBarry Smith 69295fce210SBarry Smith /* Check whether our leaves are sparse */ 69395fce210SBarry Smith for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 69495fce210SBarry Smith if (count == nroots) newilocal = NULL; 69595fce210SBarry Smith else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 696785e854fSJed Brown ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 69795fce210SBarry Smith for (i=0,count=0; i<nroots; i++) { 69895fce210SBarry Smith if (roots[i].rank >= 0) { 69995fce210SBarry Smith newilocal[count] = i; 70095fce210SBarry Smith roots[count].rank = roots[i].rank; 70195fce210SBarry Smith roots[count].index = roots[i].index; 70295fce210SBarry Smith count++; 70395fce210SBarry Smith } 70495fce210SBarry Smith } 70595fce210SBarry Smith } 70695fce210SBarry Smith 70795fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 70895fce210SBarry Smith ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 70995fce210SBarry Smith ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 71095fce210SBarry Smith PetscFunctionReturn(0); 71195fce210SBarry Smith } 71295fce210SBarry Smith 71395fce210SBarry Smith /*@ 71495fce210SBarry Smith PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 71595fce210SBarry Smith 71695fce210SBarry Smith Collective 71795fce210SBarry Smith 7184165533cSJose E. Roman Input Parameters: 71995fce210SBarry Smith + sf - communication object to duplicate 72095fce210SBarry Smith - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 72195fce210SBarry Smith 7224165533cSJose E. Roman Output Parameter: 72395fce210SBarry Smith . newsf - new communication object 72495fce210SBarry Smith 72595fce210SBarry Smith Level: beginner 72695fce210SBarry Smith 72795fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 72895fce210SBarry Smith @*/ 72995fce210SBarry Smith PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 73095fce210SBarry Smith { 73129046d53SLisandro Dalcin PetscSFType type; 73295fce210SBarry Smith PetscErrorCode ierr; 73397929ea7SJunchao Zhang MPI_Datatype dtype=MPIU_SCALAR; 73495fce210SBarry Smith 73595fce210SBarry Smith PetscFunctionBegin; 73629046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 73729046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf,opt,2); 73829046d53SLisandro Dalcin PetscValidPointer(newsf,3); 73995fce210SBarry Smith ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 74029046d53SLisandro Dalcin ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 74129046d53SLisandro Dalcin if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 74295fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) { 743dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf,1); 744dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 74595fce210SBarry Smith PetscInt nroots,nleaves; 74695fce210SBarry Smith const PetscInt *ilocal; 74795fce210SBarry Smith const PetscSFNode *iremote; 74895fce210SBarry Smith ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 74995fce210SBarry Smith ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 750dd5b3ca6SJunchao Zhang } else { 751dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 752dd5b3ca6SJunchao Zhang } 75395fce210SBarry Smith } 75497929ea7SJunchao Zhang /* Since oldtype is committed, so is newtype, according to MPI */ 75555b25c41SPierre Jolivet if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 75697929ea7SJunchao Zhang (*newsf)->vscat.bs = sf->vscat.bs; 75797929ea7SJunchao Zhang (*newsf)->vscat.unit = dtype; 75897929ea7SJunchao Zhang (*newsf)->vscat.to_n = sf->vscat.to_n; 75997929ea7SJunchao Zhang (*newsf)->vscat.from_n = sf->vscat.from_n; 76097929ea7SJunchao Zhang /* Do not copy lsf. Build it on demand since it is rarely used */ 76197929ea7SJunchao Zhang 76220c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 76320c24465SJunchao Zhang (*newsf)->backend = sf->backend; 76471438e86SJunchao Zhang (*newsf)->unknown_input_stream= sf->unknown_input_stream; 76520c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 76620c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 76720c24465SJunchao Zhang #endif 76829046d53SLisandro Dalcin if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 76920c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 77095fce210SBarry Smith PetscFunctionReturn(0); 77195fce210SBarry Smith } 77295fce210SBarry Smith 77395fce210SBarry Smith /*@C 77495fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest 77595fce210SBarry Smith 77695fce210SBarry Smith Not Collective 77795fce210SBarry Smith 7784165533cSJose E. Roman Input Parameter: 77995fce210SBarry Smith . sf - star forest 78095fce210SBarry Smith 7814165533cSJose E. Roman Output Parameters: 78295fce210SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 78395fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 784bc6585dcSJunchao Zhang . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 78595fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process 78695fce210SBarry Smith 787373e0d91SLisandro Dalcin Notes: 788373e0d91SLisandro Dalcin We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 789373e0d91SLisandro Dalcin 790245d9833Sprj- When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 791ca797d7aSLawrence Mitchell want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 792ca797d7aSLawrence Mitchell 79395fce210SBarry Smith Level: intermediate 79495fce210SBarry Smith 79595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 79695fce210SBarry Smith @*/ 79795fce210SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 79895fce210SBarry Smith { 799b8dee149SJunchao Zhang PetscErrorCode ierr; 80095fce210SBarry Smith 80195fce210SBarry Smith PetscFunctionBegin; 80295fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 803b8dee149SJunchao Zhang if (sf->ops->GetGraph) { 804b8dee149SJunchao Zhang ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 805b8dee149SJunchao Zhang } else { 80695fce210SBarry Smith if (nroots) *nroots = sf->nroots; 80795fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves; 80895fce210SBarry Smith if (ilocal) *ilocal = sf->mine; 80995fce210SBarry Smith if (iremote) *iremote = sf->remote; 810b8dee149SJunchao Zhang } 81195fce210SBarry Smith PetscFunctionReturn(0); 81295fce210SBarry Smith } 81395fce210SBarry Smith 81429046d53SLisandro Dalcin /*@ 81595fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges 81695fce210SBarry Smith 81795fce210SBarry Smith Not Collective 81895fce210SBarry Smith 8194165533cSJose E. Roman Input Parameter: 82095fce210SBarry Smith . sf - star forest 82195fce210SBarry Smith 8224165533cSJose E. Roman Output Parameters: 823dd5b3ca6SJunchao Zhang + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 824dd5b3ca6SJunchao Zhang - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 82595fce210SBarry Smith 82695fce210SBarry Smith Level: developer 82795fce210SBarry Smith 82895fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 82995fce210SBarry Smith @*/ 83095fce210SBarry Smith PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 83195fce210SBarry Smith { 83295fce210SBarry Smith PetscFunctionBegin; 83395fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 83429046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 83595fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf; 83695fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf; 83795fce210SBarry Smith PetscFunctionReturn(0); 83895fce210SBarry Smith } 83995fce210SBarry Smith 84095fce210SBarry Smith /*@C 841fe2efc57SMark PetscSFViewFromOptions - View from Options 842fe2efc57SMark 843fe2efc57SMark Collective on PetscSF 844fe2efc57SMark 845fe2efc57SMark Input Parameters: 846fe2efc57SMark + A - the star forest 847736c3998SJose E. Roman . obj - Optional object 848736c3998SJose E. Roman - name - command line option 849fe2efc57SMark 850fe2efc57SMark Level: intermediate 851fe2efc57SMark .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 852fe2efc57SMark @*/ 853fe2efc57SMark PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 854fe2efc57SMark { 855fe2efc57SMark PetscErrorCode ierr; 856fe2efc57SMark 857fe2efc57SMark PetscFunctionBegin; 858fe2efc57SMark PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 859fe2efc57SMark ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 860fe2efc57SMark PetscFunctionReturn(0); 861fe2efc57SMark } 862fe2efc57SMark 863fe2efc57SMark /*@C 86495fce210SBarry Smith PetscSFView - view a star forest 86595fce210SBarry Smith 86695fce210SBarry Smith Collective 86795fce210SBarry Smith 8684165533cSJose E. Roman Input Parameters: 86995fce210SBarry Smith + sf - star forest 87095fce210SBarry Smith - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 87195fce210SBarry Smith 87295fce210SBarry Smith Level: beginner 87395fce210SBarry Smith 87495fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph() 87595fce210SBarry Smith @*/ 87695fce210SBarry Smith PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 87795fce210SBarry Smith { 87895fce210SBarry Smith PetscErrorCode ierr; 87995fce210SBarry Smith PetscBool iascii; 88095fce210SBarry Smith PetscViewerFormat format; 88195fce210SBarry Smith 88295fce210SBarry Smith PetscFunctionBegin; 88395fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 88495fce210SBarry Smith if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 88595fce210SBarry Smith PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 88695fce210SBarry Smith PetscCheckSameComm(sf,1,viewer,2); 88780153354SVaclav Hapla if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 88895fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 889*53dd6d7dSJunchao Zhang if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 89095fce210SBarry Smith PetscMPIInt rank; 89181bfa7aaSJed Brown PetscInt ii,i,j; 89295fce210SBarry Smith 893dae58748SBarry Smith ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 89495fce210SBarry Smith ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 895dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 89680153354SVaclav Hapla if (!sf->graphset) { 89780153354SVaclav Hapla ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 89880153354SVaclav Hapla ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 89980153354SVaclav Hapla PetscFunctionReturn(0); 90080153354SVaclav Hapla } 901ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 9021575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 90395fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 90495fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 90595fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 90695fce210SBarry Smith } 90795fce210SBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 90895fce210SBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 90995fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 91081bfa7aaSJed Brown PetscMPIInt *tmpranks,*perm; 91181bfa7aaSJed Brown ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 912580bdb30SBarry Smith ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 91381bfa7aaSJed Brown for (i=0; i<sf->nranks; i++) perm[i] = i; 91481bfa7aaSJed Brown ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 91595fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 91681bfa7aaSJed Brown for (ii=0; ii<sf->nranks; ii++) { 91781bfa7aaSJed Brown i = perm[ii]; 9187904a332SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 91995fce210SBarry Smith for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 92095fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 92195fce210SBarry Smith } 92295fce210SBarry Smith } 92381bfa7aaSJed Brown ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 92495fce210SBarry Smith } 92595fce210SBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9261575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 927dd5b3ca6SJunchao Zhang } 92895fce210SBarry Smith ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 92995fce210SBarry Smith } 93062152dedSBarry Smith if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 93195fce210SBarry Smith PetscFunctionReturn(0); 93295fce210SBarry Smith } 93395fce210SBarry Smith 93495fce210SBarry Smith /*@C 935dec1416fSJunchao Zhang PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 93695fce210SBarry Smith 93795fce210SBarry Smith Not Collective 93895fce210SBarry Smith 9394165533cSJose E. Roman Input Parameter: 94095fce210SBarry Smith . sf - star forest 94195fce210SBarry Smith 9424165533cSJose E. Roman Output Parameters: 94395fce210SBarry Smith + nranks - number of ranks referenced by local part 94495fce210SBarry Smith . ranks - array of ranks 94595fce210SBarry Smith . roffset - offset in rmine/rremote for each rank (length nranks+1) 94695fce210SBarry Smith . rmine - concatenated array holding local indices referencing each remote rank 94795fce210SBarry Smith - rremote - concatenated array holding remote indices referenced for each remote rank 94895fce210SBarry Smith 94995fce210SBarry Smith Level: developer 95095fce210SBarry Smith 951dec1416fSJunchao Zhang .seealso: PetscSFGetLeafRanks() 95295fce210SBarry Smith @*/ 953dec1416fSJunchao Zhang PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 95495fce210SBarry Smith { 955dec1416fSJunchao Zhang PetscErrorCode ierr; 95695fce210SBarry Smith 95795fce210SBarry Smith PetscFunctionBegin; 95895fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 95929046d53SLisandro Dalcin if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 960dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) { 961dec1416fSJunchao Zhang ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 962dec1416fSJunchao Zhang } else { 963dec1416fSJunchao Zhang /* The generic implementation */ 96495fce210SBarry Smith if (nranks) *nranks = sf->nranks; 96595fce210SBarry Smith if (ranks) *ranks = sf->ranks; 96695fce210SBarry Smith if (roffset) *roffset = sf->roffset; 96795fce210SBarry Smith if (rmine) *rmine = sf->rmine; 96895fce210SBarry Smith if (rremote) *rremote = sf->rremote; 969dec1416fSJunchao Zhang } 97095fce210SBarry Smith PetscFunctionReturn(0); 97195fce210SBarry Smith } 97295fce210SBarry Smith 9738750ddebSJunchao Zhang /*@C 9748750ddebSJunchao Zhang PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 9758750ddebSJunchao Zhang 9768750ddebSJunchao Zhang Not Collective 9778750ddebSJunchao Zhang 9784165533cSJose E. Roman Input Parameter: 9798750ddebSJunchao Zhang . sf - star forest 9808750ddebSJunchao Zhang 9814165533cSJose E. Roman Output Parameters: 9828750ddebSJunchao Zhang + niranks - number of leaf ranks referencing roots on this process 9838750ddebSJunchao Zhang . iranks - array of ranks 9848750ddebSJunchao Zhang . ioffset - offset in irootloc for each rank (length niranks+1) 9858750ddebSJunchao Zhang - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 9868750ddebSJunchao Zhang 9878750ddebSJunchao Zhang Level: developer 9888750ddebSJunchao Zhang 989dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks() 9908750ddebSJunchao Zhang @*/ 9918750ddebSJunchao Zhang PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 9928750ddebSJunchao Zhang { 9938750ddebSJunchao Zhang PetscErrorCode ierr; 9948750ddebSJunchao Zhang 9958750ddebSJunchao Zhang PetscFunctionBegin; 9968750ddebSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 9978750ddebSJunchao Zhang if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 9988750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) { 9998750ddebSJunchao Zhang ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 10008750ddebSJunchao Zhang } else { 10018750ddebSJunchao Zhang PetscSFType type; 10028750ddebSJunchao Zhang ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 10038750ddebSJunchao Zhang SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 10048750ddebSJunchao Zhang } 10058750ddebSJunchao Zhang PetscFunctionReturn(0); 10068750ddebSJunchao Zhang } 10078750ddebSJunchao Zhang 1008b5a8e515SJed Brown static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1009b5a8e515SJed Brown PetscInt i; 1010b5a8e515SJed Brown for (i=0; i<n; i++) { 1011b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE; 1012b5a8e515SJed Brown } 1013b5a8e515SJed Brown return PETSC_FALSE; 1014b5a8e515SJed Brown } 1015b5a8e515SJed Brown 101695fce210SBarry Smith /*@C 101721c688dcSJed Brown PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 101821c688dcSJed Brown 101921c688dcSJed Brown Collective 102021c688dcSJed Brown 10214165533cSJose E. Roman Input Parameters: 1022b5a8e515SJed Brown + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1023b5a8e515SJed Brown - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 102421c688dcSJed Brown 102521c688dcSJed Brown Level: developer 102621c688dcSJed Brown 1027dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks() 102821c688dcSJed Brown @*/ 1029b5a8e515SJed Brown PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 103021c688dcSJed Brown { 103121c688dcSJed Brown PetscErrorCode ierr; 103221c688dcSJed Brown PetscTable table; 103321c688dcSJed Brown PetscTablePosition pos; 1034b5a8e515SJed Brown PetscMPIInt size,groupsize,*groupranks; 1035247e8311SStefano Zampini PetscInt *rcount,*ranks; 1036247e8311SStefano Zampini PetscInt i, irank = -1,orank = -1; 103721c688dcSJed Brown 103821c688dcSJed Brown PetscFunctionBegin; 103921c688dcSJed Brown PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 104029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 1041ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 104221c688dcSJed Brown ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 104321c688dcSJed Brown for (i=0; i<sf->nleaves; i++) { 104421c688dcSJed Brown /* Log 1-based rank */ 104521c688dcSJed Brown ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 104621c688dcSJed Brown } 104721c688dcSJed Brown ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 104821c688dcSJed Brown ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 104921c688dcSJed Brown ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 105021c688dcSJed Brown ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 105121c688dcSJed Brown for (i=0; i<sf->nranks; i++) { 105221c688dcSJed Brown ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 105321c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */ 105421c688dcSJed Brown } 105521c688dcSJed Brown ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1056b5a8e515SJed Brown 1057b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */ 1058b5a8e515SJed Brown { 10597fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 1060b5a8e515SJed Brown PetscMPIInt *dgroupranks; 1061ffc4695bSBarry Smith ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1062ffc4695bSBarry Smith ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1063b5a8e515SJed Brown ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1064b5a8e515SJed Brown ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1065b5a8e515SJed Brown for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1066ffc4695bSBarry Smith if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1067ffc4695bSBarry Smith ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1068b5a8e515SJed Brown ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1069b5a8e515SJed Brown } 1070b5a8e515SJed Brown 1071b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1072b5a8e515SJed Brown for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1073b5a8e515SJed Brown for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1074b5a8e515SJed Brown if (InList(ranks[i],groupsize,groupranks)) break; 1075b5a8e515SJed Brown } 1076b5a8e515SJed Brown for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1077b5a8e515SJed Brown if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1078b5a8e515SJed Brown } 1079b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1080b5a8e515SJed Brown PetscInt tmprank,tmpcount; 1081247e8311SStefano Zampini 1082b5a8e515SJed Brown tmprank = ranks[i]; 1083b5a8e515SJed Brown tmpcount = rcount[i]; 1084b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks]; 1085b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks]; 1086b5a8e515SJed Brown ranks[sf->ndranks] = tmprank; 1087b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount; 1088b5a8e515SJed Brown sf->ndranks++; 1089b5a8e515SJed Brown } 1090b5a8e515SJed Brown } 1091b5a8e515SJed Brown ierr = PetscFree(groupranks);CHKERRQ(ierr); 1092b5a8e515SJed Brown ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1093b5a8e515SJed Brown ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 109421c688dcSJed Brown sf->roffset[0] = 0; 109521c688dcSJed Brown for (i=0; i<sf->nranks; i++) { 109621c688dcSJed Brown ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 109721c688dcSJed Brown sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 109821c688dcSJed Brown rcount[i] = 0; 109921c688dcSJed Brown } 1100247e8311SStefano Zampini for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1101247e8311SStefano Zampini /* short circuit */ 1102247e8311SStefano Zampini if (orank != sf->remote[i].rank) { 110321c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */ 1104b5a8e515SJed Brown ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1105b5a8e515SJed Brown if (irank < 0) { 1106b5a8e515SJed Brown ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1107b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks; 110821c688dcSJed Brown } 1109247e8311SStefano Zampini orank = sf->remote[i].rank; 1110247e8311SStefano Zampini } 1111b5a8e515SJed Brown if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 111221c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 111321c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 111421c688dcSJed Brown rcount[irank]++; 111521c688dcSJed Brown } 111621c688dcSJed Brown ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 111721c688dcSJed Brown PetscFunctionReturn(0); 111821c688dcSJed Brown } 111921c688dcSJed Brown 112021c688dcSJed Brown /*@C 112195fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups 112295fce210SBarry Smith 112395fce210SBarry Smith Collective 112495fce210SBarry Smith 11254165533cSJose E. Roman Input Parameter: 112695fce210SBarry Smith . sf - star forest 112795fce210SBarry Smith 11284165533cSJose E. Roman Output Parameters: 112995fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots) 113095fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference) 113195fce210SBarry Smith 113295fce210SBarry Smith Level: developer 113395fce210SBarry Smith 113495fce210SBarry Smith .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 113595fce210SBarry Smith @*/ 113695fce210SBarry Smith PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 113795fce210SBarry Smith { 113895fce210SBarry Smith PetscErrorCode ierr; 11397fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 114095fce210SBarry Smith 114195fce210SBarry Smith PetscFunctionBegin; 114244ee17edSStefano Zampini if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 114395fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) { 114495fce210SBarry Smith PetscInt i; 114595fce210SBarry Smith const PetscInt *indegree; 114695fce210SBarry Smith PetscMPIInt rank,*outranks,*inranks; 114795fce210SBarry Smith PetscSFNode *remote; 114895fce210SBarry Smith PetscSF bgcount; 114995fce210SBarry Smith 115095fce210SBarry Smith /* Compute the number of incoming ranks */ 1151785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 115295fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 115395fce210SBarry Smith remote[i].rank = sf->ranks[i]; 115495fce210SBarry Smith remote[i].index = 0; 115595fce210SBarry Smith } 115695fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 115795fce210SBarry Smith ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 115895fce210SBarry Smith ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 115995fce210SBarry Smith ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 116095fce210SBarry Smith /* Enumerate the incoming ranks */ 1161dcca6d9dSJed Brown ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1162ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 116395fce210SBarry Smith for (i=0; i<sf->nranks; i++) outranks[i] = rank; 116495fce210SBarry Smith ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 116595fce210SBarry Smith ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1166ffc4695bSBarry Smith ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1167ffc4695bSBarry Smith ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1168ffc4695bSBarry Smith ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 116995fce210SBarry Smith ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 117095fce210SBarry Smith ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 117195fce210SBarry Smith } 117295fce210SBarry Smith *incoming = sf->ingroup; 117395fce210SBarry Smith 117495fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) { 1175ffc4695bSBarry Smith ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1176ffc4695bSBarry Smith ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1177ffc4695bSBarry Smith ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 117895fce210SBarry Smith } 117995fce210SBarry Smith *outgoing = sf->outgroup; 118095fce210SBarry Smith PetscFunctionReturn(0); 118195fce210SBarry Smith } 118295fce210SBarry Smith 118329046d53SLisandro Dalcin /*@ 11843b8d980fSPierre Jolivet PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 118595fce210SBarry Smith 118695fce210SBarry Smith Collective 118795fce210SBarry Smith 11884165533cSJose E. Roman Input Parameter: 118995fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex 119095fce210SBarry Smith 11914165533cSJose E. Roman Output Parameter: 119295fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 119395fce210SBarry Smith 119495fce210SBarry Smith Level: developer 119595fce210SBarry Smith 119695fce210SBarry Smith Notes: 119795fce210SBarry Smith 119895fce210SBarry Smith In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 119995fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 120095fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal. 120195fce210SBarry Smith 1202673100f5SVaclav Hapla .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 120395fce210SBarry Smith @*/ 120495fce210SBarry Smith PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 120595fce210SBarry Smith { 120695fce210SBarry Smith PetscErrorCode ierr; 120795fce210SBarry Smith 120895fce210SBarry Smith PetscFunctionBegin; 120995fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 121095fce210SBarry Smith PetscValidPointer(multi,2); 121195fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 121295fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 121395fce210SBarry Smith *multi = sf->multi; 1214013b3241SStefano Zampini sf->multi->multi = sf->multi; 121595fce210SBarry Smith PetscFunctionReturn(0); 121695fce210SBarry Smith } 121795fce210SBarry Smith if (!sf->multi) { 121895fce210SBarry Smith const PetscInt *indegree; 12199837ea96SMatthew G. Knepley PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 122095fce210SBarry Smith PetscSFNode *remote; 122129046d53SLisandro Dalcin maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 122295fce210SBarry Smith ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 122395fce210SBarry Smith ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 12249837ea96SMatthew G. Knepley ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 122595fce210SBarry Smith inoffset[0] = 0; 122695fce210SBarry Smith for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 12279837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) outones[i] = 1; 1228dbd2ff41SBarry Smith ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1229dbd2ff41SBarry Smith ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 123095fce210SBarry Smith for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 123176bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 123295fce210SBarry Smith for (i=0; i<sf->nroots; i++) { 123395fce210SBarry Smith if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 123495fce210SBarry Smith } 123576bd3646SJed Brown } 1236785e854fSJed Brown ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 123795fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 123895fce210SBarry Smith remote[i].rank = sf->remote[i].rank; 123938e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 124095fce210SBarry Smith } 124195fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1242013b3241SStefano Zampini sf->multi->multi = sf->multi; 124301365b40SToby Isaac ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 124495fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */ 124595fce210SBarry Smith PetscMPIInt rank; 124695fce210SBarry Smith PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 124795fce210SBarry Smith PetscSFNode *newremote; 1248ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 124995fce210SBarry Smith for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 12509837ea96SMatthew G. Knepley ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 12519837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) outranks[i] = rank; 125283df288dSJunchao Zhang ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 125383df288dSJunchao Zhang ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 125495fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */ 125595fce210SBarry Smith for (i=0; i<sf->nroots; i++) { 125695fce210SBarry Smith PetscInt j; 125795fce210SBarry Smith for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 125895fce210SBarry Smith ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 125995fce210SBarry Smith for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 126095fce210SBarry Smith } 1261ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1262ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1263785e854fSJed Brown ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 126495fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 126595fce210SBarry Smith newremote[i].rank = sf->remote[i].rank; 126601365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 126795fce210SBarry Smith } 126801365b40SToby Isaac ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 126995fce210SBarry Smith ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 127095fce210SBarry Smith } 127195fce210SBarry Smith ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 127295fce210SBarry Smith } 127395fce210SBarry Smith *multi = sf->multi; 127495fce210SBarry Smith PetscFunctionReturn(0); 127595fce210SBarry Smith } 127695fce210SBarry Smith 127795fce210SBarry Smith /*@C 127872502a1fSJunchao Zhang PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 127995fce210SBarry Smith 128095fce210SBarry Smith Collective 128195fce210SBarry Smith 12824165533cSJose E. Roman Input Parameters: 128395fce210SBarry Smith + sf - original star forest 1284ba2a7774SJunchao Zhang . nselected - number of selected roots on this process 1285ba2a7774SJunchao Zhang - selected - indices of the selected roots on this process 128695fce210SBarry Smith 12874165533cSJose E. Roman Output Parameter: 1288cd620004SJunchao Zhang . esf - new star forest 128995fce210SBarry Smith 129095fce210SBarry Smith Level: advanced 129195fce210SBarry Smith 129295fce210SBarry Smith Note: 129395fce210SBarry Smith To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 129495fce210SBarry Smith be done by calling PetscSFGetGraph(). 129595fce210SBarry Smith 129695fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFGetGraph() 129795fce210SBarry Smith @*/ 129872502a1fSJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 129995fce210SBarry Smith { 1300cd620004SJunchao Zhang PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1301cd620004SJunchao Zhang const PetscInt *ilocal; 1302cd620004SJunchao Zhang signed char *rootdata,*leafdata,*leafmem; 1303ba2a7774SJunchao Zhang const PetscSFNode *iremote; 1304f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1305f659e5c7SJunchao Zhang MPI_Comm comm; 13060511a646SMatthew G. Knepley PetscErrorCode ierr; 130795fce210SBarry Smith 130895fce210SBarry Smith PetscFunctionBegin; 130995fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 131029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 1311ba2a7774SJunchao Zhang if (nselected) PetscValidPointer(selected,3); 1312cd620004SJunchao Zhang PetscValidPointer(esf,4); 13130511a646SMatthew G. Knepley 1314f659e5c7SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1315140a1472SStefano Zampini ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1316f659e5c7SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1317cd620004SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1318cd620004SJunchao Zhang 131976bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1320cd620004SJunchao Zhang PetscBool dups; 1321cd620004SJunchao Zhang ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1322cd620004SJunchao Zhang if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1323cd620004SJunchao Zhang for (i=0; i<nselected; i++) 1324cd620004SJunchao Zhang if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1325cd620004SJunchao Zhang } 1326f659e5c7SJunchao Zhang 132772502a1fSJunchao Zhang if (sf->ops->CreateEmbeddedRootSF) { 132872502a1fSJunchao Zhang ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1329f659e5c7SJunchao Zhang } else { 1330cd620004SJunchao Zhang /* A generic version of creating embedded sf */ 1331cd620004SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1332cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 1333cd620004SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1334cd620004SJunchao Zhang leafdata = leafmem - minleaf; 1335cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */ 1336cd620004SJunchao Zhang for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1337ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1338ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1339ba2a7774SJunchao Zhang 1340cd620004SJunchao Zhang /* Build esf with leaves that are still connected */ 1341cd620004SJunchao Zhang esf_nleaves = 0; 1342cd620004SJunchao Zhang for (i=0; i<nleaves; i++) { 1343cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1344cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1345cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1346cd620004SJunchao Zhang */ 1347cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0); 1348cd620004SJunchao Zhang } 1349cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1350cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1351cd620004SJunchao Zhang for (i=n=0; i<nleaves; i++) { 1352cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1353cd620004SJunchao Zhang if (leafdata[j]) { 1354cd620004SJunchao Zhang new_ilocal[n] = j; 1355cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank; 1356cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index; 1357fc1ede2bSMatthew G. Knepley ++n; 135895fce210SBarry Smith } 135995fce210SBarry Smith } 1360cd620004SJunchao Zhang ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1361cd620004SJunchao Zhang ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1362cd620004SJunchao Zhang ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1363cd620004SJunchao Zhang ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1364f659e5c7SJunchao Zhang } 1365140a1472SStefano Zampini ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 136695fce210SBarry Smith PetscFunctionReturn(0); 136795fce210SBarry Smith } 136895fce210SBarry Smith 13692f5fb4c2SMatthew G. Knepley /*@C 13702f5fb4c2SMatthew G. Knepley PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 13712f5fb4c2SMatthew G. Knepley 13722f5fb4c2SMatthew G. Knepley Collective 13732f5fb4c2SMatthew G. Knepley 13744165533cSJose E. Roman Input Parameters: 13752f5fb4c2SMatthew G. Knepley + sf - original star forest 1376f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process 1377f659e5c7SJunchao Zhang - selected - indices of the selected leaves on this process 13782f5fb4c2SMatthew G. Knepley 13794165533cSJose E. Roman Output Parameter: 13802f5fb4c2SMatthew G. Knepley . newsf - new star forest 13812f5fb4c2SMatthew G. Knepley 13822f5fb4c2SMatthew G. Knepley Level: advanced 13832f5fb4c2SMatthew G. Knepley 138472502a1fSJunchao Zhang .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 13852f5fb4c2SMatthew G. Knepley @*/ 1386f659e5c7SJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 13872f5fb4c2SMatthew G. Knepley { 1388f659e5c7SJunchao Zhang const PetscSFNode *iremote; 1389f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1390f659e5c7SJunchao Zhang const PetscInt *ilocal; 1391f659e5c7SJunchao Zhang PetscInt i,nroots,*leaves,*new_ilocal; 1392f659e5c7SJunchao Zhang MPI_Comm comm; 13932f5fb4c2SMatthew G. Knepley PetscErrorCode ierr; 13942f5fb4c2SMatthew G. Knepley 13952f5fb4c2SMatthew G. Knepley PetscFunctionBegin; 13962f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 139729046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 1398f659e5c7SJunchao Zhang if (nselected) PetscValidPointer(selected,3); 13992f5fb4c2SMatthew G. Knepley PetscValidPointer(newsf,4); 14002f5fb4c2SMatthew G. Knepley 1401f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */ 1402f659e5c7SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1403f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1404dd5b3ca6SJunchao Zhang ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1405f659e5c7SJunchao Zhang ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1406f659e5c7SJunchao Zhang if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1407f659e5c7SJunchao Zhang 1408f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1409f659e5c7SJunchao Zhang if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1410f659e5c7SJunchao Zhang ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1411f659e5c7SJunchao Zhang } else { 1412f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1413f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1414f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1415f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) { 1416f659e5c7SJunchao Zhang const PetscInt l = leaves[i]; 1417f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 1418f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 1419f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 14202f5fb4c2SMatthew G. Knepley } 14214cc19a0cSStefano Zampini ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1422f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1423f659e5c7SJunchao Zhang } 1424f659e5c7SJunchao Zhang ierr = PetscFree(leaves);CHKERRQ(ierr); 14252f5fb4c2SMatthew G. Knepley PetscFunctionReturn(0); 14262f5fb4c2SMatthew G. Knepley } 14272f5fb4c2SMatthew G. Knepley 142895fce210SBarry Smith /*@C 1429ad227feaSJunchao Zhang PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 14303482bfa8SJunchao Zhang 14313482bfa8SJunchao Zhang Collective on PetscSF 14323482bfa8SJunchao Zhang 14334165533cSJose E. Roman Input Parameters: 14343482bfa8SJunchao Zhang + sf - star forest on which to communicate 14353482bfa8SJunchao Zhang . unit - data type associated with each node 14363482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14373482bfa8SJunchao Zhang - op - operation to use for reduction 14383482bfa8SJunchao Zhang 14394165533cSJose E. Roman Output Parameter: 14403482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14413482bfa8SJunchao Zhang 14423482bfa8SJunchao Zhang Level: intermediate 14433482bfa8SJunchao Zhang 1444d0295fc0SJunchao Zhang Notes: 1445d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1446d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1447ad227feaSJunchao Zhang use PetscSFBcastWithMemTypeBegin() instead. 1448ad227feaSJunchao Zhang .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 14493482bfa8SJunchao Zhang @*/ 1450ad227feaSJunchao Zhang PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 14513482bfa8SJunchao Zhang { 14523482bfa8SJunchao Zhang PetscErrorCode ierr; 1453eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 14543482bfa8SJunchao Zhang 14553482bfa8SJunchao Zhang PetscFunctionBegin; 14563482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 14573482bfa8SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1458ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1459eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1460eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1461ad227feaSJunchao Zhang ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1462ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 14633482bfa8SJunchao Zhang PetscFunctionReturn(0); 14643482bfa8SJunchao Zhang } 14653482bfa8SJunchao Zhang 14663482bfa8SJunchao Zhang /*@C 1467ad227feaSJunchao Zhang PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1468d0295fc0SJunchao Zhang 1469d0295fc0SJunchao Zhang Collective on PetscSF 1470d0295fc0SJunchao Zhang 14714165533cSJose E. Roman Input Parameters: 1472d0295fc0SJunchao Zhang + sf - star forest on which to communicate 1473d0295fc0SJunchao Zhang . unit - data type associated with each node 1474d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1475d0295fc0SJunchao Zhang . rootdata - buffer to broadcast 1476d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1477d0295fc0SJunchao Zhang - op - operation to use for reduction 1478d0295fc0SJunchao Zhang 14794165533cSJose E. Roman Output Parameter: 1480d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 1481d0295fc0SJunchao Zhang 1482d0295fc0SJunchao Zhang Level: intermediate 1483d0295fc0SJunchao Zhang 1484ad227feaSJunchao Zhang .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1485d0295fc0SJunchao Zhang @*/ 1486ad227feaSJunchao Zhang PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1487d0295fc0SJunchao Zhang { 1488d0295fc0SJunchao Zhang PetscErrorCode ierr; 1489d0295fc0SJunchao Zhang 1490d0295fc0SJunchao Zhang PetscFunctionBegin; 1491d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1492d0295fc0SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1493ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1494ad227feaSJunchao Zhang ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1495ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1496d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1497d0295fc0SJunchao Zhang } 1498d0295fc0SJunchao Zhang 1499d0295fc0SJunchao Zhang /*@C 1500ad227feaSJunchao Zhang PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 15013482bfa8SJunchao Zhang 15023482bfa8SJunchao Zhang Collective 15033482bfa8SJunchao Zhang 15044165533cSJose E. Roman Input Parameters: 15053482bfa8SJunchao Zhang + sf - star forest 15063482bfa8SJunchao Zhang . unit - data type 15073482bfa8SJunchao Zhang . rootdata - buffer to broadcast 15083482bfa8SJunchao Zhang - op - operation to use for reduction 15093482bfa8SJunchao Zhang 15104165533cSJose E. Roman Output Parameter: 15113482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 15123482bfa8SJunchao Zhang 15133482bfa8SJunchao Zhang Level: intermediate 15143482bfa8SJunchao Zhang 15153482bfa8SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 15163482bfa8SJunchao Zhang @*/ 1517ad227feaSJunchao Zhang PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 15183482bfa8SJunchao Zhang { 15193482bfa8SJunchao Zhang PetscErrorCode ierr; 15203482bfa8SJunchao Zhang 15213482bfa8SJunchao Zhang PetscFunctionBegin; 15223482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1523ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1524ad227feaSJunchao Zhang ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1525ad227feaSJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 15263482bfa8SJunchao Zhang PetscFunctionReturn(0); 15273482bfa8SJunchao Zhang } 15283482bfa8SJunchao Zhang 15293482bfa8SJunchao Zhang /*@C 153095fce210SBarry Smith PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 153195fce210SBarry Smith 153295fce210SBarry Smith Collective 153395fce210SBarry Smith 15344165533cSJose E. Roman Input Parameters: 153595fce210SBarry Smith + sf - star forest 153695fce210SBarry Smith . unit - data type 153795fce210SBarry Smith . leafdata - values to reduce 153895fce210SBarry Smith - op - reduction operation 153995fce210SBarry Smith 15404165533cSJose E. Roman Output Parameter: 154195fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 154295fce210SBarry Smith 154395fce210SBarry Smith Level: intermediate 154495fce210SBarry Smith 1545d0295fc0SJunchao Zhang Notes: 1546d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1547d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1548d0295fc0SJunchao Zhang use PetscSFReduceWithMemTypeBegin() instead. 1549d0295fc0SJunchao Zhang 1550d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 155195fce210SBarry Smith @*/ 155295fce210SBarry Smith PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 155395fce210SBarry Smith { 155495fce210SBarry Smith PetscErrorCode ierr; 1555eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 155695fce210SBarry Smith 155795fce210SBarry Smith PetscFunctionBegin; 155895fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 155995fce210SBarry Smith ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 156097929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1561eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1562eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1563eb02082bSJunchao Zhang ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 156497929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 156595fce210SBarry Smith PetscFunctionReturn(0); 156695fce210SBarry Smith } 156795fce210SBarry Smith 156895fce210SBarry Smith /*@C 1569d0295fc0SJunchao Zhang PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1570d0295fc0SJunchao Zhang 1571d0295fc0SJunchao Zhang Collective 1572d0295fc0SJunchao Zhang 15734165533cSJose E. Roman Input Parameters: 1574d0295fc0SJunchao Zhang + sf - star forest 1575d0295fc0SJunchao Zhang . unit - data type 1576d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1577d0295fc0SJunchao Zhang . leafdata - values to reduce 1578d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1579d0295fc0SJunchao Zhang - op - reduction operation 1580d0295fc0SJunchao Zhang 15814165533cSJose E. Roman Output Parameter: 1582d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root 1583d0295fc0SJunchao Zhang 1584d0295fc0SJunchao Zhang Level: intermediate 1585d0295fc0SJunchao Zhang 1586d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1587d0295fc0SJunchao Zhang @*/ 1588d0295fc0SJunchao Zhang PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1589d0295fc0SJunchao Zhang { 1590d0295fc0SJunchao Zhang PetscErrorCode ierr; 1591d0295fc0SJunchao Zhang 1592d0295fc0SJunchao Zhang PetscFunctionBegin; 1593d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1594d0295fc0SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 159597929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1596d0295fc0SJunchao Zhang ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 159797929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1598d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1599d0295fc0SJunchao Zhang } 1600d0295fc0SJunchao Zhang 1601d0295fc0SJunchao Zhang /*@C 160295fce210SBarry Smith PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 160395fce210SBarry Smith 160495fce210SBarry Smith Collective 160595fce210SBarry Smith 16064165533cSJose E. Roman Input Parameters: 160795fce210SBarry Smith + sf - star forest 160895fce210SBarry Smith . unit - data type 160995fce210SBarry Smith . leafdata - values to reduce 161095fce210SBarry Smith - op - reduction operation 161195fce210SBarry Smith 16124165533cSJose E. Roman Output Parameter: 161395fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 161495fce210SBarry Smith 161595fce210SBarry Smith Level: intermediate 161695fce210SBarry Smith 161795fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 161895fce210SBarry Smith @*/ 161995fce210SBarry Smith PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 162095fce210SBarry Smith { 162195fce210SBarry Smith PetscErrorCode ierr; 162295fce210SBarry Smith 162395fce210SBarry Smith PetscFunctionBegin; 162495fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 162597929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 162600816365SJunchao Zhang ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 162797929ea7SJunchao Zhang if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 162895fce210SBarry Smith PetscFunctionReturn(0); 162995fce210SBarry Smith } 163095fce210SBarry Smith 163195fce210SBarry Smith /*@C 1632a1729e3fSJunchao Zhang PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1633a1729e3fSJunchao Zhang 1634a1729e3fSJunchao Zhang Collective 1635a1729e3fSJunchao Zhang 16364165533cSJose E. Roman Input Parameters: 1637a1729e3fSJunchao Zhang + sf - star forest 1638a1729e3fSJunchao Zhang . unit - data type 1639a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1640a1729e3fSJunchao Zhang - op - operation to use for reduction 1641a1729e3fSJunchao Zhang 16424165533cSJose E. Roman Output Parameters: 1643a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1644a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1645a1729e3fSJunchao Zhang 1646a1729e3fSJunchao Zhang Level: advanced 1647a1729e3fSJunchao Zhang 1648a1729e3fSJunchao Zhang Note: 1649a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1650a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1651a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1652a1729e3fSJunchao Zhang integers. 1653a1729e3fSJunchao Zhang 1654a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1655a1729e3fSJunchao Zhang @*/ 1656a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1657a1729e3fSJunchao Zhang { 1658a1729e3fSJunchao Zhang PetscErrorCode ierr; 1659eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype,leafupdatemtype; 1660a1729e3fSJunchao Zhang 1661a1729e3fSJunchao Zhang PetscFunctionBegin; 1662a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1663a1729e3fSJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1664a1729e3fSJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1665eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1666eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1667eb02082bSJunchao Zhang ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1668eb02082bSJunchao Zhang if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1669eb02082bSJunchao Zhang ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1670a1729e3fSJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1671a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1672a1729e3fSJunchao Zhang } 1673a1729e3fSJunchao Zhang 1674a1729e3fSJunchao Zhang /*@C 1675a1729e3fSJunchao Zhang PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1676a1729e3fSJunchao Zhang 1677a1729e3fSJunchao Zhang Collective 1678a1729e3fSJunchao Zhang 16794165533cSJose E. Roman Input Parameters: 1680a1729e3fSJunchao Zhang + sf - star forest 1681a1729e3fSJunchao Zhang . unit - data type 1682a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1683a1729e3fSJunchao Zhang - op - operation to use for reduction 1684a1729e3fSJunchao Zhang 16854165533cSJose E. Roman Output Parameters: 1686a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1687a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1688a1729e3fSJunchao Zhang 1689a1729e3fSJunchao Zhang Level: advanced 1690a1729e3fSJunchao Zhang 1691a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1692a1729e3fSJunchao Zhang @*/ 1693a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1694a1729e3fSJunchao Zhang { 1695a1729e3fSJunchao Zhang PetscErrorCode ierr; 1696a1729e3fSJunchao Zhang 1697a1729e3fSJunchao Zhang PetscFunctionBegin; 1698a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1699a1729e3fSJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 170000816365SJunchao Zhang ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1701a1729e3fSJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1702a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1703a1729e3fSJunchao Zhang } 1704a1729e3fSJunchao Zhang 1705a1729e3fSJunchao Zhang /*@C 170695fce210SBarry Smith PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 170795fce210SBarry Smith 170895fce210SBarry Smith Collective 170995fce210SBarry Smith 17104165533cSJose E. Roman Input Parameter: 171195fce210SBarry Smith . sf - star forest 171295fce210SBarry Smith 17134165533cSJose E. Roman Output Parameter: 171495fce210SBarry Smith . degree - degree of each root vertex 171595fce210SBarry Smith 171695fce210SBarry Smith Level: advanced 171795fce210SBarry Smith 1718ffe67aa5SVáclav Hapla Notes: 1719ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1720ffe67aa5SVáclav Hapla 172195fce210SBarry Smith .seealso: PetscSFGatherBegin() 172295fce210SBarry Smith @*/ 172395fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 172495fce210SBarry Smith { 172595fce210SBarry Smith PetscErrorCode ierr; 172695fce210SBarry Smith 172795fce210SBarry Smith PetscFunctionBegin; 172895fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 172995fce210SBarry Smith PetscSFCheckGraphSet(sf,1); 173095fce210SBarry Smith PetscValidPointer(degree,2); 1731803bd9e8SMatthew G. Knepley if (!sf->degreeknown) { 17325b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal; 1733803bd9e8SMatthew G. Knepley if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 17345b0d146aSStefano Zampini maxlocal = sf->maxleaf-sf->minleaf+1; 173529046d53SLisandro Dalcin ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 173629046d53SLisandro Dalcin ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 173729046d53SLisandro Dalcin for (i=0; i<nroots; i++) sf->degree[i] = 0; 17389837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 17395b0d146aSStefano Zampini ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 174095fce210SBarry Smith } 174195fce210SBarry Smith *degree = NULL; 174295fce210SBarry Smith PetscFunctionReturn(0); 174395fce210SBarry Smith } 174495fce210SBarry Smith 174595fce210SBarry Smith /*@C 174695fce210SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 174795fce210SBarry Smith 174895fce210SBarry Smith Collective 174995fce210SBarry Smith 17504165533cSJose E. Roman Input Parameter: 175195fce210SBarry Smith . sf - star forest 175295fce210SBarry Smith 17534165533cSJose E. Roman Output Parameter: 175495fce210SBarry Smith . degree - degree of each root vertex 175595fce210SBarry Smith 175695fce210SBarry Smith Level: developer 175795fce210SBarry Smith 1758ffe67aa5SVáclav Hapla Notes: 1759ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1760ffe67aa5SVáclav Hapla 176195fce210SBarry Smith .seealso: 176295fce210SBarry Smith @*/ 176395fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 176495fce210SBarry Smith { 176595fce210SBarry Smith PetscErrorCode ierr; 176695fce210SBarry Smith 176795fce210SBarry Smith PetscFunctionBegin; 176895fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 176995fce210SBarry Smith PetscSFCheckGraphSet(sf,1); 177029046d53SLisandro Dalcin PetscValidPointer(degree,2); 177195fce210SBarry Smith if (!sf->degreeknown) { 177229046d53SLisandro Dalcin if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 17735b0d146aSStefano Zampini ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 177495fce210SBarry Smith ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 177595fce210SBarry Smith sf->degreeknown = PETSC_TRUE; 177695fce210SBarry Smith } 177795fce210SBarry Smith *degree = sf->degree; 177895fce210SBarry Smith PetscFunctionReturn(0); 177995fce210SBarry Smith } 178095fce210SBarry Smith 1781673100f5SVaclav Hapla /*@C 178266dfcd1aSVaclav Hapla PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 178366dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root. 1784673100f5SVaclav Hapla 1785673100f5SVaclav Hapla Collective 1786673100f5SVaclav Hapla 17874165533cSJose E. Roman Input Parameters: 1788673100f5SVaclav Hapla + sf - star forest 1789673100f5SVaclav Hapla - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1790673100f5SVaclav Hapla 17914165533cSJose E. Roman Output Parameters: 179266dfcd1aSVaclav Hapla + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 179366dfcd1aSVaclav Hapla - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1794673100f5SVaclav Hapla 1795673100f5SVaclav Hapla Level: developer 1796673100f5SVaclav Hapla 1797ffe67aa5SVáclav Hapla Notes: 1798ffe67aa5SVáclav Hapla The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1799ffe67aa5SVáclav Hapla 1800673100f5SVaclav Hapla .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1801673100f5SVaclav Hapla @*/ 180266dfcd1aSVaclav Hapla PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1803673100f5SVaclav Hapla { 1804673100f5SVaclav Hapla PetscSF msf; 1805673100f5SVaclav Hapla PetscInt i, j, k, nroots, nmroots; 1806673100f5SVaclav Hapla PetscErrorCode ierr; 1807673100f5SVaclav Hapla 1808673100f5SVaclav Hapla PetscFunctionBegin; 1809673100f5SVaclav Hapla PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1810673100f5SVaclav Hapla ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 181129328920SVaclav Hapla if (nroots) PetscValidIntPointer(degree,2); 181266dfcd1aSVaclav Hapla if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 181366dfcd1aSVaclav Hapla PetscValidPointer(multiRootsOrigNumbering,4); 1814673100f5SVaclav Hapla ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1815673100f5SVaclav Hapla ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 181666dfcd1aSVaclav Hapla ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1817673100f5SVaclav Hapla for (i=0,j=0,k=0; i<nroots; i++) { 1818673100f5SVaclav Hapla if (!degree[i]) continue; 1819673100f5SVaclav Hapla for (j=0; j<degree[i]; j++,k++) { 182066dfcd1aSVaclav Hapla (*multiRootsOrigNumbering)[k] = i; 1821673100f5SVaclav Hapla } 1822673100f5SVaclav Hapla } 1823673100f5SVaclav Hapla if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 182466dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots; 1825673100f5SVaclav Hapla PetscFunctionReturn(0); 1826673100f5SVaclav Hapla } 1827673100f5SVaclav Hapla 182895fce210SBarry Smith /*@C 182995fce210SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 183095fce210SBarry Smith 183195fce210SBarry Smith Collective 183295fce210SBarry Smith 18334165533cSJose E. Roman Input Parameters: 183495fce210SBarry Smith + sf - star forest 183595fce210SBarry Smith . unit - data type 183695fce210SBarry Smith - leafdata - leaf data to gather to roots 183795fce210SBarry Smith 18384165533cSJose E. Roman Output Parameter: 183995fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 184095fce210SBarry Smith 184195fce210SBarry Smith Level: intermediate 184295fce210SBarry Smith 184395fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 184495fce210SBarry Smith @*/ 184595fce210SBarry Smith PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 184695fce210SBarry Smith { 184795fce210SBarry Smith PetscErrorCode ierr; 1848a5526d50SJunchao Zhang PetscSF multi = NULL; 184995fce210SBarry Smith 185095fce210SBarry Smith PetscFunctionBegin; 185195fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 185229046d53SLisandro Dalcin ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 185395fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 185483df288dSJunchao Zhang ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 185595fce210SBarry Smith PetscFunctionReturn(0); 185695fce210SBarry Smith } 185795fce210SBarry Smith 185895fce210SBarry Smith /*@C 185995fce210SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 186095fce210SBarry Smith 186195fce210SBarry Smith Collective 186295fce210SBarry Smith 18634165533cSJose E. Roman Input Parameters: 186495fce210SBarry Smith + sf - star forest 186595fce210SBarry Smith . unit - data type 186695fce210SBarry Smith - leafdata - leaf data to gather to roots 186795fce210SBarry Smith 18684165533cSJose E. Roman Output Parameter: 186995fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 187095fce210SBarry Smith 187195fce210SBarry Smith Level: intermediate 187295fce210SBarry Smith 187395fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 187495fce210SBarry Smith @*/ 187595fce210SBarry Smith PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 187695fce210SBarry Smith { 187795fce210SBarry Smith PetscErrorCode ierr; 1878a5526d50SJunchao Zhang PetscSF multi = NULL; 187995fce210SBarry Smith 188095fce210SBarry Smith PetscFunctionBegin; 188195fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 188295fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 188383df288dSJunchao Zhang ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 188495fce210SBarry Smith PetscFunctionReturn(0); 188595fce210SBarry Smith } 188695fce210SBarry Smith 188795fce210SBarry Smith /*@C 188895fce210SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 188995fce210SBarry Smith 189095fce210SBarry Smith Collective 189195fce210SBarry Smith 18924165533cSJose E. Roman Input Parameters: 189395fce210SBarry Smith + sf - star forest 189495fce210SBarry Smith . unit - data type 189595fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 189695fce210SBarry Smith 18974165533cSJose E. Roman Output Parameter: 189895fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 189995fce210SBarry Smith 190095fce210SBarry Smith Level: intermediate 190195fce210SBarry Smith 190295fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 190395fce210SBarry Smith @*/ 190495fce210SBarry Smith PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 190595fce210SBarry Smith { 190695fce210SBarry Smith PetscErrorCode ierr; 1907a5526d50SJunchao Zhang PetscSF multi = NULL; 190895fce210SBarry Smith 190995fce210SBarry Smith PetscFunctionBegin; 191095fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 191195fce210SBarry Smith ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 191295fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1913ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 191495fce210SBarry Smith PetscFunctionReturn(0); 191595fce210SBarry Smith } 191695fce210SBarry Smith 191795fce210SBarry Smith /*@C 191895fce210SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 191995fce210SBarry Smith 192095fce210SBarry Smith Collective 192195fce210SBarry Smith 19224165533cSJose E. Roman Input Parameters: 192395fce210SBarry Smith + sf - star forest 192495fce210SBarry Smith . unit - data type 192595fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 192695fce210SBarry Smith 19274165533cSJose E. Roman Output Parameter: 192895fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 192995fce210SBarry Smith 193095fce210SBarry Smith Level: intermediate 193195fce210SBarry Smith 193295fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 193395fce210SBarry Smith @*/ 193495fce210SBarry Smith PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 193595fce210SBarry Smith { 193695fce210SBarry Smith PetscErrorCode ierr; 1937a5526d50SJunchao Zhang PetscSF multi = NULL; 193895fce210SBarry Smith 193995fce210SBarry Smith PetscFunctionBegin; 194095fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 194195fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1942ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 194395fce210SBarry Smith PetscFunctionReturn(0); 194495fce210SBarry Smith } 1945a7b3aa13SAta Mesgarnejad 1946a072220fSLawrence Mitchell static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1947a072220fSLawrence Mitchell { 1948a072220fSLawrence Mitchell PetscInt i, n, nleaves; 1949a072220fSLawrence Mitchell const PetscInt *ilocal = NULL; 1950a072220fSLawrence Mitchell PetscHSetI seen; 1951a072220fSLawrence Mitchell PetscErrorCode ierr; 1952a072220fSLawrence Mitchell 1953a072220fSLawrence Mitchell PetscFunctionBegin; 1954b458e8f1SJose E. Roman if (PetscDefined(USE_DEBUG)) { 1955a072220fSLawrence Mitchell ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1956a072220fSLawrence Mitchell ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1957a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) { 1958a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 1959a072220fSLawrence Mitchell ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1960a072220fSLawrence Mitchell } 1961a072220fSLawrence Mitchell ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1962a072220fSLawrence Mitchell if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1963a072220fSLawrence Mitchell ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1964b458e8f1SJose E. Roman } 1965a072220fSLawrence Mitchell PetscFunctionReturn(0); 1966a072220fSLawrence Mitchell } 196754729392SStefano Zampini 1968a7b3aa13SAta Mesgarnejad /*@ 196904c0ada0SJunchao Zhang PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1970a7b3aa13SAta Mesgarnejad 1971a7b3aa13SAta Mesgarnejad Input Parameters: 197254729392SStefano Zampini + sfA - The first PetscSF 197354729392SStefano Zampini - sfB - The second PetscSF 1974a7b3aa13SAta Mesgarnejad 1975a7b3aa13SAta Mesgarnejad Output Parameters: 197654729392SStefano Zampini . sfBA - The composite SF 1977a7b3aa13SAta Mesgarnejad 1978a7b3aa13SAta Mesgarnejad Level: developer 1979a7b3aa13SAta Mesgarnejad 1980a072220fSLawrence Mitchell Notes: 198154729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 198254729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. 198354729392SStefano Zampini 198454729392SStefano Zampini sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 198554729392SStefano Zampini a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 198654729392SStefano Zampini nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 198754729392SStefano Zampini Bcast on sfA, then a Bcast on sfB, on connected nodes. 1988a072220fSLawrence Mitchell 198904c0ada0SJunchao Zhang .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1990a7b3aa13SAta Mesgarnejad @*/ 1991a7b3aa13SAta Mesgarnejad PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1992a7b3aa13SAta Mesgarnejad { 199304c0ada0SJunchao Zhang PetscErrorCode ierr; 1994a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA,*remotePointsB; 1995d41018fbSJunchao Zhang PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 199654729392SStefano Zampini const PetscInt *localPointsA,*localPointsB; 199754729392SStefano Zampini PetscInt *localPointsBA; 199854729392SStefano Zampini PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 199954729392SStefano Zampini PetscBool denseB; 2000a7b3aa13SAta Mesgarnejad 2001a7b3aa13SAta Mesgarnejad PetscFunctionBegin; 2002a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 200329046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA,1); 200429046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 200529046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB,2); 200654729392SStefano Zampini PetscCheckSameComm(sfA,1,sfB,2); 200729046d53SLisandro Dalcin PetscValidPointer(sfBA,3); 200854729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 200954729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 201054729392SStefano Zampini 2011a7b3aa13SAta Mesgarnejad ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2012a7b3aa13SAta Mesgarnejad ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2013d41018fbSJunchao Zhang if (localPointsA) { 201454729392SStefano Zampini ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 201554729392SStefano Zampini for (i=0; i<numRootsB; i++) { 201654729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 201754729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 201854729392SStefano Zampini } 201954729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 202054729392SStefano Zampini if (localPointsA[i] >= numRootsB) continue; 202154729392SStefano Zampini reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 202254729392SStefano Zampini } 2023d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA; 2024d41018fbSJunchao Zhang } 2025d41018fbSJunchao Zhang ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2026d41018fbSJunchao Zhang ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2027ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2028ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2029d41018fbSJunchao Zhang ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2030d41018fbSJunchao Zhang 203154729392SStefano Zampini denseB = (PetscBool)!localPointsB; 203254729392SStefano Zampini for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 203354729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 203454729392SStefano Zampini else numLeavesBA++; 203554729392SStefano Zampini } 203654729392SStefano Zampini if (denseB) { 2037d41018fbSJunchao Zhang localPointsBA = NULL; 2038d41018fbSJunchao Zhang remotePointsBA = leafdataB; 2039d41018fbSJunchao Zhang } else { 204054729392SStefano Zampini ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 204154729392SStefano Zampini ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 204254729392SStefano Zampini for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 204354729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i; 204454729392SStefano Zampini 204554729392SStefano Zampini if (leafdataB[l-minleaf].rank == -1) continue; 204654729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 204754729392SStefano Zampini localPointsBA[numLeavesBA] = l; 204854729392SStefano Zampini numLeavesBA++; 204954729392SStefano Zampini } 2050d41018fbSJunchao Zhang ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2051d41018fbSJunchao Zhang } 205254729392SStefano Zampini ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 205320c24465SJunchao Zhang ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 205454729392SStefano Zampini ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2055a7b3aa13SAta Mesgarnejad PetscFunctionReturn(0); 2056a7b3aa13SAta Mesgarnejad } 20571c6ba672SJunchao Zhang 205804c0ada0SJunchao Zhang /*@ 205954729392SStefano Zampini PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 206004c0ada0SJunchao Zhang 206104c0ada0SJunchao Zhang Input Parameters: 206254729392SStefano Zampini + sfA - The first PetscSF 206354729392SStefano Zampini - sfB - The second PetscSF 206404c0ada0SJunchao Zhang 206504c0ada0SJunchao Zhang Output Parameters: 206654729392SStefano Zampini . sfBA - The composite SF. 206704c0ada0SJunchao Zhang 206804c0ada0SJunchao Zhang Level: developer 206904c0ada0SJunchao Zhang 207054729392SStefano Zampini Notes: 207154729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 207254729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 207354729392SStefano Zampini second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 207454729392SStefano Zampini 207554729392SStefano Zampini sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 207654729392SStefano Zampini a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 207754729392SStefano Zampini roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 207854729392SStefano Zampini a Reduce on sfB, on connected roots. 207954729392SStefano Zampini 208054729392SStefano Zampini .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 208104c0ada0SJunchao Zhang @*/ 208204c0ada0SJunchao Zhang PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 208304c0ada0SJunchao Zhang { 208404c0ada0SJunchao Zhang PetscErrorCode ierr; 208504c0ada0SJunchao Zhang const PetscSFNode *remotePointsA,*remotePointsB; 208604c0ada0SJunchao Zhang PetscSFNode *remotePointsBA; 208704c0ada0SJunchao Zhang const PetscInt *localPointsA,*localPointsB; 208854729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL; 208954729392SStefano Zampini PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 20905b0d146aSStefano Zampini MPI_Op op; 20915b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 20925b0d146aSStefano Zampini PetscBool iswin; 20935b0d146aSStefano Zampini #endif 209404c0ada0SJunchao Zhang 209504c0ada0SJunchao Zhang PetscFunctionBegin; 209604c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 209704c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA,1); 209804c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 209904c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB,2); 210054729392SStefano Zampini PetscCheckSameComm(sfA,1,sfB,2); 210104c0ada0SJunchao Zhang PetscValidPointer(sfBA,3); 210254729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 210354729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 210454729392SStefano Zampini 210504c0ada0SJunchao Zhang ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 210604c0ada0SJunchao Zhang ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 21075b0d146aSStefano Zampini 21085b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */ 21095b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC 211083df288dSJunchao Zhang with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 21115b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if 21125b0d146aSStefano Zampini the root condition is not meet. 21135b0d146aSStefano Zampini */ 21145b0d146aSStefano Zampini op = MPI_MAXLOC; 21155b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 21165b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 21175b0d146aSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 211883df288dSJunchao Zhang if (iswin) op = MPI_REPLACE; 21195b0d146aSStefano Zampini #endif 21205b0d146aSStefano Zampini 212154729392SStefano Zampini ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 212254729392SStefano Zampini ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 212354729392SStefano Zampini for (i=0; i<maxleaf - minleaf + 1; i++) { 212454729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 212554729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 212654729392SStefano Zampini } 212754729392SStefano Zampini if (localPointsA) { 212854729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 212954729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 213054729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 213154729392SStefano Zampini } 213254729392SStefano Zampini } else { 213354729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 213454729392SStefano Zampini if (i > maxleaf || i < minleaf) continue; 213554729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 213654729392SStefano Zampini } 213754729392SStefano Zampini } 213854729392SStefano Zampini 213954729392SStefano Zampini ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 214004c0ada0SJunchao Zhang ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 214154729392SStefano Zampini for (i=0; i<numRootsB; i++) { 214254729392SStefano Zampini remotePointsBA[i].rank = -1; 214354729392SStefano Zampini remotePointsBA[i].index = -1; 214454729392SStefano Zampini } 214554729392SStefano Zampini 21465b0d146aSStefano Zampini ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 21475b0d146aSStefano Zampini ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 214854729392SStefano Zampini ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 214954729392SStefano Zampini for (i=0,numLeavesBA=0; i<numRootsB; i++) { 215054729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue; 215154729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 215254729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 215354729392SStefano Zampini localPointsBA[numLeavesBA] = i; 215454729392SStefano Zampini numLeavesBA++; 215554729392SStefano Zampini } 215654729392SStefano Zampini ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 215720c24465SJunchao Zhang ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 215854729392SStefano Zampini ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 215904c0ada0SJunchao Zhang PetscFunctionReturn(0); 216004c0ada0SJunchao Zhang } 216104c0ada0SJunchao Zhang 21621c6ba672SJunchao Zhang /* 21631c6ba672SJunchao Zhang PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 21641c6ba672SJunchao Zhang 21651c6ba672SJunchao Zhang Input Parameters: 21661c6ba672SJunchao Zhang . sf - The global PetscSF 21671c6ba672SJunchao Zhang 21681c6ba672SJunchao Zhang Output Parameters: 21691c6ba672SJunchao Zhang . out - The local PetscSF 21701c6ba672SJunchao Zhang */ 21711c6ba672SJunchao Zhang PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 21721c6ba672SJunchao Zhang { 21731c6ba672SJunchao Zhang MPI_Comm comm; 21741c6ba672SJunchao Zhang PetscMPIInt myrank; 21751c6ba672SJunchao Zhang const PetscInt *ilocal; 21761c6ba672SJunchao Zhang const PetscSFNode *iremote; 21771c6ba672SJunchao Zhang PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 21781c6ba672SJunchao Zhang PetscSFNode *liremote; 21791c6ba672SJunchao Zhang PetscSF lsf; 21801c6ba672SJunchao Zhang PetscErrorCode ierr; 21811c6ba672SJunchao Zhang 21821c6ba672SJunchao Zhang PetscFunctionBegin; 21831c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 21841c6ba672SJunchao Zhang if (sf->ops->CreateLocalSF) { 21851c6ba672SJunchao Zhang ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 21861c6ba672SJunchao Zhang } else { 21871c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 21881c6ba672SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2189ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 21901c6ba672SJunchao Zhang 21911c6ba672SJunchao Zhang /* Find out local edges and build a local SF */ 21921c6ba672SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 21931c6ba672SJunchao Zhang for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 21941c6ba672SJunchao Zhang ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 21951c6ba672SJunchao Zhang ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 21961c6ba672SJunchao Zhang 21971c6ba672SJunchao Zhang for (i=j=0; i<nleaves; i++) { 21981c6ba672SJunchao Zhang if (iremote[i].rank == (PetscInt)myrank) { 21991c6ba672SJunchao Zhang lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 22001c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 22011c6ba672SJunchao Zhang liremote[j].index = iremote[i].index; 22021c6ba672SJunchao Zhang j++; 22031c6ba672SJunchao Zhang } 22041c6ba672SJunchao Zhang } 22051c6ba672SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 220620c24465SJunchao Zhang ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 22071c6ba672SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 22081c6ba672SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 22091c6ba672SJunchao Zhang *out = lsf; 22101c6ba672SJunchao Zhang } 22111c6ba672SJunchao Zhang PetscFunctionReturn(0); 22121c6ba672SJunchao Zhang } 2213dd5b3ca6SJunchao Zhang 2214dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2215dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2216dd5b3ca6SJunchao Zhang { 2217dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 2218eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 2219dd5b3ca6SJunchao Zhang 2220dd5b3ca6SJunchao Zhang PetscFunctionBegin; 2221dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2222dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2223ad227feaSJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2224eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2225eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2226dd5b3ca6SJunchao Zhang if (sf->ops->BcastToZero) { 2227eb02082bSJunchao Zhang ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2228dd5b3ca6SJunchao Zhang } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2229ad227feaSJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2230dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 2231dd5b3ca6SJunchao Zhang } 2232dd5b3ca6SJunchao Zhang 2233