1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 495fce210SBarry Smith #include <petscctable.h> 595fce210SBarry Smith 67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 77fd2d3dbSJunchao Zhang #include <cuda_runtime.h> 87fd2d3dbSJunchao Zhang #endif 97fd2d3dbSJunchao Zhang 107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 117fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h> 127fd2d3dbSJunchao Zhang #endif 137fd2d3dbSJunchao Zhang 142abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER) 152abc8c78SJacob Faibussowitsch void PetscSFCheckGraphSet(PetscSF, int); 162abc8c78SJacob Faibussowitsch #else 1795fce210SBarry Smith #if defined(PETSC_USE_DEBUG) 187a46b595SBarry Smith #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 1995fce210SBarry Smith #else 209371c9d4SSatish Balay #define PetscSFCheckGraphSet(sf, arg) \ 219371c9d4SSatish Balay do { \ 229371c9d4SSatish Balay } while (0) 2395fce210SBarry Smith #endif 242abc8c78SJacob Faibussowitsch #endif 2595fce210SBarry Smith 264c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 2795fce210SBarry Smith 288af6ec1cSBarry Smith /*@ 2995fce210SBarry Smith PetscSFCreate - create a star forest communication context 3095fce210SBarry Smith 31d083f849SBarry Smith Collective 3295fce210SBarry Smith 334165533cSJose E. Roman Input Parameter: 3495fce210SBarry Smith . comm - communicator on which the star forest will operate 3595fce210SBarry Smith 364165533cSJose E. Roman Output Parameter: 3795fce210SBarry Smith . sf - new star forest context 3895fce210SBarry Smith 39dd5b3ca6SJunchao Zhang Options Database Keys: 40dd5b3ca6SJunchao Zhang + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 41dd5b3ca6SJunchao Zhang . -sf_type window -Use MPI-3 one-sided window for communication 42dd5b3ca6SJunchao Zhang - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 43dd5b3ca6SJunchao Zhang 4495fce210SBarry Smith Level: intermediate 4595fce210SBarry Smith 46dd5b3ca6SJunchao Zhang Notes: 47dd5b3ca6SJunchao Zhang When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 48dd5b3ca6SJunchao Zhang MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 49dd5b3ca6SJunchao Zhang SFs are optimized and they have better performance than general SFs. 50dd5b3ca6SJunchao Zhang 51db781477SPatrick Sanan .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 5295fce210SBarry Smith @*/ 539371c9d4SSatish Balay PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) { 5495fce210SBarry Smith PetscSF b; 5595fce210SBarry Smith 5695fce210SBarry Smith PetscFunctionBegin; 5795fce210SBarry Smith PetscValidPointer(sf, 2); 589566063dSJacob Faibussowitsch PetscCall(PetscSFInitializePackage()); 5995fce210SBarry Smith 609566063dSJacob Faibussowitsch PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 6195fce210SBarry Smith 6295fce210SBarry Smith b->nroots = -1; 6395fce210SBarry Smith b->nleaves = -1; 6429046d53SLisandro Dalcin b->minleaf = PETSC_MAX_INT; 6529046d53SLisandro Dalcin b->maxleaf = PETSC_MIN_INT; 6695fce210SBarry Smith b->nranks = -1; 6795fce210SBarry Smith b->rankorder = PETSC_TRUE; 6895fce210SBarry Smith b->ingroup = MPI_GROUP_NULL; 6995fce210SBarry Smith b->outgroup = MPI_GROUP_NULL; 7095fce210SBarry Smith b->graphset = PETSC_FALSE; 7120c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 7220c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi; 7320c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE; 7471438e86SJunchao Zhang b->unknown_input_stream = PETSC_FALSE; 7527f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 7620c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS; 7727f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA) 7827f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA; 7959af0bd3SScott Kruger #elif defined(PETSC_HAVE_HIP) 8059af0bd3SScott Kruger b->backend = PETSCSF_BACKEND_HIP; 8120c24465SJunchao Zhang #endif 8271438e86SJunchao Zhang 8371438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 8471438e86SJunchao Zhang b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 8571438e86SJunchao Zhang b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 869566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 879566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 8871438e86SJunchao Zhang #endif 8920c24465SJunchao Zhang #endif 9060c22052SBarry Smith b->vscat.from_n = -1; 9160c22052SBarry Smith b->vscat.to_n = -1; 9260c22052SBarry Smith b->vscat.unit = MPIU_SCALAR; 9395fce210SBarry Smith *sf = b; 9495fce210SBarry Smith PetscFunctionReturn(0); 9595fce210SBarry Smith } 9695fce210SBarry Smith 9729046d53SLisandro Dalcin /*@ 9895fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 9995fce210SBarry Smith 10095fce210SBarry Smith Collective 10195fce210SBarry Smith 1024165533cSJose E. Roman Input Parameter: 10395fce210SBarry Smith . sf - star forest 10495fce210SBarry Smith 10595fce210SBarry Smith Level: advanced 10695fce210SBarry Smith 107db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 10895fce210SBarry Smith @*/ 1099371c9d4SSatish Balay PetscErrorCode PetscSFReset(PetscSF sf) { 11095fce210SBarry Smith PetscFunctionBegin; 11195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 112dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Reset); 11329046d53SLisandro Dalcin sf->nroots = -1; 11429046d53SLisandro Dalcin sf->nleaves = -1; 11529046d53SLisandro Dalcin sf->minleaf = PETSC_MAX_INT; 11629046d53SLisandro Dalcin sf->maxleaf = PETSC_MIN_INT; 11795fce210SBarry Smith sf->mine = NULL; 11895fce210SBarry Smith sf->remote = NULL; 11929046d53SLisandro Dalcin sf->graphset = PETSC_FALSE; 1209566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->mine_alloc)); 1219566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->remote_alloc)); 12221c688dcSJed Brown sf->nranks = -1; 1239566063dSJacob Faibussowitsch PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 12429046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE; 1259566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degree)); 1269566063dSJacob Faibussowitsch if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 1279566063dSJacob Faibussowitsch if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 128013b3241SStefano Zampini if (sf->multi) sf->multi->multi = NULL; 1299566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf->multi)); 1309566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sf->map)); 13171438e86SJunchao Zhang 13271438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 1339566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 13471438e86SJunchao Zhang #endif 13571438e86SJunchao Zhang 13695fce210SBarry Smith sf->setupcalled = PETSC_FALSE; 13795fce210SBarry Smith PetscFunctionReturn(0); 13895fce210SBarry Smith } 13995fce210SBarry Smith 14095fce210SBarry Smith /*@C 14129046d53SLisandro Dalcin PetscSFSetType - Set the PetscSF communication implementation 14295fce210SBarry Smith 14395fce210SBarry Smith Collective on PetscSF 14495fce210SBarry Smith 14595fce210SBarry Smith Input Parameters: 14695fce210SBarry Smith + sf - the PetscSF context 14795fce210SBarry Smith - type - a known method 14895fce210SBarry Smith 14995fce210SBarry Smith Options Database Key: 15095fce210SBarry Smith . -sf_type <type> - Sets the method; use -help for a list 15170616304SStefano Zampini of available methods (for instance, window, basic, neighbor) 15295fce210SBarry Smith 15395fce210SBarry Smith Notes: 15495fce210SBarry Smith See "include/petscsf.h" for available methods (for instance) 15595fce210SBarry Smith + PETSCSFWINDOW - MPI-2/3 one-sided 15695fce210SBarry Smith - PETSCSFBASIC - basic implementation using MPI-1 two-sided 15795fce210SBarry Smith 15895fce210SBarry Smith Level: intermediate 15995fce210SBarry Smith 160db781477SPatrick Sanan .seealso: `PetscSFType`, `PetscSFCreate()` 16195fce210SBarry Smith @*/ 1629371c9d4SSatish Balay PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) { 16395fce210SBarry Smith PetscBool match; 1645f80ce2aSJacob Faibussowitsch PetscErrorCode (*r)(PetscSF); 16595fce210SBarry Smith 16695fce210SBarry Smith PetscFunctionBegin; 16795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16895fce210SBarry Smith PetscValidCharPointer(type, 2); 16995fce210SBarry Smith 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 17195fce210SBarry Smith if (match) PetscFunctionReturn(0); 17295fce210SBarry Smith 1739566063dSJacob Faibussowitsch PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 17428b400f6SJacob Faibussowitsch PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 17529046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */ 176dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Destroy); 1779566063dSJacob Faibussowitsch PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 1789566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 1799566063dSJacob Faibussowitsch PetscCall((*r)(sf)); 18095fce210SBarry Smith PetscFunctionReturn(0); 18195fce210SBarry Smith } 18295fce210SBarry Smith 18329046d53SLisandro Dalcin /*@C 18429046d53SLisandro Dalcin PetscSFGetType - Get the PetscSF communication implementation 18529046d53SLisandro Dalcin 18629046d53SLisandro Dalcin Not Collective 18729046d53SLisandro Dalcin 18829046d53SLisandro Dalcin Input Parameter: 18929046d53SLisandro Dalcin . sf - the PetscSF context 19029046d53SLisandro Dalcin 19129046d53SLisandro Dalcin Output Parameter: 19229046d53SLisandro Dalcin . type - the PetscSF type name 19329046d53SLisandro Dalcin 19429046d53SLisandro Dalcin Level: intermediate 19529046d53SLisandro Dalcin 196db781477SPatrick Sanan .seealso: `PetscSFSetType()`, `PetscSFCreate()` 19729046d53SLisandro Dalcin @*/ 1989371c9d4SSatish Balay PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) { 19929046d53SLisandro Dalcin PetscFunctionBegin; 20029046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 20129046d53SLisandro Dalcin PetscValidPointer(type, 2); 20229046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name; 20329046d53SLisandro Dalcin PetscFunctionReturn(0); 20429046d53SLisandro Dalcin } 20529046d53SLisandro Dalcin 2061fb7b255SJunchao Zhang /*@C 20795fce210SBarry Smith PetscSFDestroy - destroy star forest 20895fce210SBarry Smith 20995fce210SBarry Smith Collective 21095fce210SBarry Smith 2114165533cSJose E. Roman Input Parameter: 21295fce210SBarry Smith . sf - address of star forest 21395fce210SBarry Smith 21495fce210SBarry Smith Level: intermediate 21595fce210SBarry Smith 216db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFReset()` 21795fce210SBarry Smith @*/ 2189371c9d4SSatish Balay PetscErrorCode PetscSFDestroy(PetscSF *sf) { 21995fce210SBarry Smith PetscFunctionBegin; 22095fce210SBarry Smith if (!*sf) PetscFunctionReturn(0); 22195fce210SBarry Smith PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 2229371c9d4SSatish Balay if (--((PetscObject)(*sf))->refct > 0) { 2239371c9d4SSatish Balay *sf = NULL; 2249371c9d4SSatish Balay PetscFunctionReturn(0); 2259371c9d4SSatish Balay } 2269566063dSJacob Faibussowitsch PetscCall(PetscSFReset(*sf)); 227dbbe0bcdSBarry Smith PetscTryTypeMethod((*sf), Destroy); 2289566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 2299566063dSJacob Faibussowitsch if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 2309566063dSJacob Faibussowitsch PetscCall(PetscHeaderDestroy(sf)); 23195fce210SBarry Smith PetscFunctionReturn(0); 23295fce210SBarry Smith } 23395fce210SBarry Smith 2349371c9d4SSatish Balay static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) { 235c4e6a40aSLawrence Mitchell PetscInt i, nleaves; 236c4e6a40aSLawrence Mitchell PetscMPIInt size; 237c4e6a40aSLawrence Mitchell const PetscInt *ilocal; 238c4e6a40aSLawrence Mitchell const PetscSFNode *iremote; 239c4e6a40aSLawrence Mitchell 240c4e6a40aSLawrence Mitchell PetscFunctionBegin; 24176bd3646SJed Brown if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 2429566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 2439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 244c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) { 245c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank; 246c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index; 247c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 248c9cc58a2SBarry Smith PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 24908401ef6SPierre Jolivet PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 25008401ef6SPierre Jolivet PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 251c4e6a40aSLawrence Mitchell } 252c4e6a40aSLawrence Mitchell PetscFunctionReturn(0); 253c4e6a40aSLawrence Mitchell } 254c4e6a40aSLawrence Mitchell 25595fce210SBarry Smith /*@ 25695fce210SBarry Smith PetscSFSetUp - set up communication structures 25795fce210SBarry Smith 25895fce210SBarry Smith Collective 25995fce210SBarry Smith 2604165533cSJose E. Roman Input Parameter: 26195fce210SBarry Smith . sf - star forest communication object 26295fce210SBarry Smith 26395fce210SBarry Smith Level: beginner 26495fce210SBarry Smith 265db781477SPatrick Sanan .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 26695fce210SBarry Smith @*/ 2679371c9d4SSatish Balay PetscErrorCode PetscSFSetUp(PetscSF sf) { 26895fce210SBarry Smith PetscFunctionBegin; 26929046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 27029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 27195fce210SBarry Smith if (sf->setupcalled) PetscFunctionReturn(0); 2729566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 2739566063dSJacob Faibussowitsch PetscCall(PetscSFCheckGraphValid_Private(sf)); 2749566063dSJacob Faibussowitsch if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 275dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetUp); 27620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 27720c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) { 27871438e86SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_CUDA; 27971438e86SJunchao Zhang sf->ops->Free = PetscSFFree_CUDA; 28020c24465SJunchao Zhang } 28120c24465SJunchao Zhang #endif 28259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 28359af0bd3SScott Kruger if (sf->backend == PETSCSF_BACKEND_HIP) { 28459af0bd3SScott Kruger sf->ops->Malloc = PetscSFMalloc_HIP; 28559af0bd3SScott Kruger sf->ops->Free = PetscSFFree_HIP; 28659af0bd3SScott Kruger } 28759af0bd3SScott Kruger #endif 28820c24465SJunchao Zhang 28959af0bd3SScott Kruger # 29020c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 29120c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 29220c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos; 29320c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos; 29420c24465SJunchao Zhang } 29520c24465SJunchao Zhang #endif 2969566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 29795fce210SBarry Smith sf->setupcalled = PETSC_TRUE; 29895fce210SBarry Smith PetscFunctionReturn(0); 29995fce210SBarry Smith } 30095fce210SBarry Smith 3018af6ec1cSBarry Smith /*@ 30295fce210SBarry Smith PetscSFSetFromOptions - set PetscSF options using the options database 30395fce210SBarry Smith 30495fce210SBarry Smith Logically Collective 30595fce210SBarry Smith 3064165533cSJose E. Roman Input Parameter: 30795fce210SBarry Smith . sf - star forest 30895fce210SBarry Smith 30995fce210SBarry Smith Options Database Keys: 31060263706SJed Brown + -sf_type - implementation type, see PetscSFSetType() 31151ccb202SJunchao Zhang . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 312b85e67b7SJunchao Zhang . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 313c2a741eeSJunchao Zhang use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 314c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 31520c24465SJunchao Zhang . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 316c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 31795fce210SBarry Smith 31859af0bd3SScott Kruger - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 31959af0bd3SScott Kruger On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 32020c24465SJunchao Zhang the only available is kokkos. 32120c24465SJunchao Zhang 32295fce210SBarry Smith Level: intermediate 32395fce210SBarry Smith @*/ 3249371c9d4SSatish Balay PetscErrorCode PetscSFSetFromOptions(PetscSF sf) { 32595fce210SBarry Smith PetscSFType deft; 32695fce210SBarry Smith char type[256]; 32795fce210SBarry Smith PetscBool flg; 32895fce210SBarry Smith 32995fce210SBarry Smith PetscFunctionBegin; 33095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 331d0609cedSBarry Smith PetscObjectOptionsBegin((PetscObject)sf); 33295fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 3339566063dSJacob Faibussowitsch PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 3349566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, flg ? type : deft)); 3359566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 3367fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 33720c24465SJunchao Zhang { 33820c24465SJunchao Zhang char backendstr[32] = {0}; 33959af0bd3SScott Kruger PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 34020c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */ 3419566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 3429566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 3439566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 3449566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 3459566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 3469566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 34759af0bd3SScott Kruger #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 34820c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 34920c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 35059af0bd3SScott Kruger else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 35128b400f6SJacob Faibussowitsch else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 35220c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS) 35308401ef6SPierre Jolivet PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 35420c24465SJunchao Zhang #endif 35520c24465SJunchao Zhang } 356c2a741eeSJunchao Zhang #endif 357dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 358d0609cedSBarry Smith PetscOptionsEnd(); 35995fce210SBarry Smith PetscFunctionReturn(0); 36095fce210SBarry Smith } 36195fce210SBarry Smith 36229046d53SLisandro Dalcin /*@ 36395fce210SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 36495fce210SBarry Smith 36595fce210SBarry Smith Logically Collective 36695fce210SBarry Smith 3674165533cSJose E. Roman Input Parameters: 36895fce210SBarry Smith + sf - star forest 36995fce210SBarry Smith - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 37095fce210SBarry Smith 37195fce210SBarry Smith Level: advanced 37295fce210SBarry Smith 373db781477SPatrick Sanan .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 37495fce210SBarry Smith @*/ 3759371c9d4SSatish Balay PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) { 37695fce210SBarry Smith PetscFunctionBegin; 37795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 37895fce210SBarry Smith PetscValidLogicalCollectiveBool(sf, flg, 2); 37928b400f6SJacob Faibussowitsch PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 38095fce210SBarry Smith sf->rankorder = flg; 38195fce210SBarry Smith PetscFunctionReturn(0); 38295fce210SBarry Smith } 38395fce210SBarry Smith 3848dbb0df6SBarry Smith /*@C 38595fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest 38695fce210SBarry Smith 38795fce210SBarry Smith Collective 38895fce210SBarry Smith 3894165533cSJose E. Roman Input Parameters: 39095fce210SBarry Smith + sf - star forest 39195fce210SBarry Smith . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 39295fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 393c4e6a40aSLawrence Mitchell . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 394c4e6a40aSLawrence Mitchell during setup in debug mode) 39595fce210SBarry Smith . localmode - copy mode for ilocal 396c4e6a40aSLawrence Mitchell . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 397c4e6a40aSLawrence Mitchell during setup in debug mode) 39895fce210SBarry Smith - remotemode - copy mode for iremote 39995fce210SBarry Smith 40095fce210SBarry Smith Level: intermediate 40195fce210SBarry Smith 40295452b02SPatrick Sanan Notes: 403db2b9530SVaclav Hapla Leaf indices in ilocal must be unique, otherwise an error occurs. 40438ab3f8aSBarry Smith 405db2b9530SVaclav Hapla Input arrays ilocal and iremote follow the PetscCopyMode semantics. 406db2b9530SVaclav Hapla In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 407db2b9530SVaclav Hapla PETSc might modify the respective array; 408db2b9530SVaclav Hapla if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 409e97d3de3SVaclav Hapla Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 410db2b9530SVaclav Hapla 411db2b9530SVaclav Hapla Fortran Notes: 412db2b9530SVaclav Hapla In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 413c4e6a40aSLawrence Mitchell 41472bf8598SVaclav Hapla Developer Notes: 415db2b9530SVaclav Hapla We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 416db2b9530SVaclav Hapla This also allows to compare leaf sets of two SFs easily. 41772bf8598SVaclav Hapla 418db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 41995fce210SBarry Smith @*/ 4209371c9d4SSatish Balay PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) { 421db2b9530SVaclav Hapla PetscBool unique, contiguous; 42295fce210SBarry Smith 42395fce210SBarry Smith PetscFunctionBegin; 42495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 42529046d53SLisandro Dalcin if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 42629046d53SLisandro Dalcin if (nleaves > 0) PetscValidPointer(iremote, 6); 42708401ef6SPierre Jolivet PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 42808401ef6SPierre Jolivet PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 4298da24d32SBarry Smith /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 4308da24d32SBarry Smith * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 4318da24d32SBarry Smith PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 4328da24d32SBarry Smith PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 43329046d53SLisandro Dalcin 4342a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */ 4359566063dSJacob Faibussowitsch PetscCall(PetscSFReset(sf)); 4362a67d2daSStefano Zampini } 4372a67d2daSStefano Zampini 4389566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 43929046d53SLisandro Dalcin 44095fce210SBarry Smith sf->nroots = nroots; 44195fce210SBarry Smith sf->nleaves = nleaves; 44229046d53SLisandro Dalcin 443db2b9530SVaclav Hapla if (localmode == PETSC_COPY_VALUES && ilocal) { 444db2b9530SVaclav Hapla PetscInt *tlocal = NULL; 445db2b9530SVaclav Hapla 4469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tlocal)); 4479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 448db2b9530SVaclav Hapla ilocal = tlocal; 449db2b9530SVaclav Hapla } 450db2b9530SVaclav Hapla if (remotemode == PETSC_COPY_VALUES) { 451db2b9530SVaclav Hapla PetscSFNode *tremote = NULL; 452db2b9530SVaclav Hapla 4539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tremote)); 4549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 455db2b9530SVaclav Hapla iremote = tremote; 456db2b9530SVaclav Hapla } 457db2b9530SVaclav Hapla 45829046d53SLisandro Dalcin if (nleaves && ilocal) { 459db2b9530SVaclav Hapla PetscSFNode work; 460db2b9530SVaclav Hapla 4619566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 4629566063dSJacob Faibussowitsch PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 463db2b9530SVaclav Hapla unique = PetscNot(unique); 464db2b9530SVaclav Hapla PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 465db2b9530SVaclav Hapla sf->minleaf = ilocal[0]; 466db2b9530SVaclav Hapla sf->maxleaf = ilocal[nleaves - 1]; 467db2b9530SVaclav Hapla contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 46829046d53SLisandro Dalcin } else { 46929046d53SLisandro Dalcin sf->minleaf = 0; 47029046d53SLisandro Dalcin sf->maxleaf = nleaves - 1; 471db2b9530SVaclav Hapla unique = PETSC_TRUE; 472db2b9530SVaclav Hapla contiguous = PETSC_TRUE; 47329046d53SLisandro Dalcin } 47429046d53SLisandro Dalcin 475db2b9530SVaclav Hapla if (contiguous) { 476db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 477db2b9530SVaclav Hapla ilocal = NULL; 478db2b9530SVaclav Hapla } else { 4799566063dSJacob Faibussowitsch PetscCall(PetscFree(ilocal)); 480db2b9530SVaclav Hapla } 481db2b9530SVaclav Hapla } 482db2b9530SVaclav Hapla sf->mine = ilocal; 483db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) { 48429046d53SLisandro Dalcin sf->mine_alloc = NULL; 485db2b9530SVaclav Hapla } else { 486db2b9530SVaclav Hapla sf->mine_alloc = ilocal; 48795fce210SBarry Smith } 488db2b9530SVaclav Hapla sf->remote = iremote; 489db2b9530SVaclav Hapla if (remotemode == PETSC_USE_POINTER) { 49029046d53SLisandro Dalcin sf->remote_alloc = NULL; 491db2b9530SVaclav Hapla } else { 492db2b9530SVaclav Hapla sf->remote_alloc = iremote; 49395fce210SBarry Smith } 4949566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 49529046d53SLisandro Dalcin sf->graphset = PETSC_TRUE; 49695fce210SBarry Smith PetscFunctionReturn(0); 49795fce210SBarry Smith } 49895fce210SBarry Smith 49929046d53SLisandro Dalcin /*@ 500dd5b3ca6SJunchao Zhang PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 501dd5b3ca6SJunchao Zhang 502dd5b3ca6SJunchao Zhang Collective 503dd5b3ca6SJunchao Zhang 504dd5b3ca6SJunchao Zhang Input Parameters: 505dd5b3ca6SJunchao Zhang + sf - The PetscSF 506dd5b3ca6SJunchao Zhang . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 507dd5b3ca6SJunchao Zhang - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 508dd5b3ca6SJunchao Zhang 509dd5b3ca6SJunchao Zhang Notes: 510dd5b3ca6SJunchao Zhang It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 511dd5b3ca6SJunchao Zhang n and N are local and global sizes of x respectively. 512dd5b3ca6SJunchao Zhang 513dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 514dd5b3ca6SJunchao Zhang sequential vectors y on all ranks. 515dd5b3ca6SJunchao Zhang 516dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 517dd5b3ca6SJunchao Zhang sequential vector y on rank 0. 518dd5b3ca6SJunchao Zhang 519dd5b3ca6SJunchao Zhang In above cases, entries of x are roots and entries of y are leaves. 520dd5b3ca6SJunchao Zhang 521dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 522dd5b3ca6SJunchao Zhang creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 523dd5b3ca6SJunchao Zhang of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 524dd5b3ca6SJunchao Zhang not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 525dd5b3ca6SJunchao Zhang items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 526dd5b3ca6SJunchao Zhang 527dd5b3ca6SJunchao Zhang In this case, roots and leaves are symmetric. 528dd5b3ca6SJunchao Zhang 529dd5b3ca6SJunchao Zhang Level: intermediate 530dd5b3ca6SJunchao Zhang @*/ 5319371c9d4SSatish Balay PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) { 532dd5b3ca6SJunchao Zhang MPI_Comm comm; 533dd5b3ca6SJunchao Zhang PetscInt n, N, res[2]; 534dd5b3ca6SJunchao Zhang PetscMPIInt rank, size; 535dd5b3ca6SJunchao Zhang PetscSFType type; 536dd5b3ca6SJunchao Zhang 537dd5b3ca6SJunchao Zhang PetscFunctionBegin; 5382abc8c78SJacob Faibussowitsch PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 5392abc8c78SJacob Faibussowitsch if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 5409566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 5412c71b3e2SJacob Faibussowitsch PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 5429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 544dd5b3ca6SJunchao Zhang 545dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) { 546dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL; 5479566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &sf->map)); 5489566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 5499566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 5509566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(sf->map)); 551dd5b3ca6SJunchao Zhang } else { 5529566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetLocalSize(map, &n)); 5539566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetSize(map, &N)); 554dd5b3ca6SJunchao Zhang res[0] = n; 555dd5b3ca6SJunchao Zhang res[1] = -n; 556dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */ 5571c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 558dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */ 559dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 560dd5b3ca6SJunchao Zhang } else { 561dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 562dd5b3ca6SJunchao Zhang } 5639566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(map, &sf->map)); 564dd5b3ca6SJunchao Zhang } 5659566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, type)); 566dd5b3ca6SJunchao Zhang 567dd5b3ca6SJunchao Zhang sf->pattern = pattern; 568dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */ 569dd5b3ca6SJunchao Zhang 570dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 571dd5b3ca6SJunchao Zhang Also set other easy stuff. 572dd5b3ca6SJunchao Zhang */ 573dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) { 574dd5b3ca6SJunchao Zhang sf->nleaves = N; 575dd5b3ca6SJunchao Zhang sf->nroots = n; 576dd5b3ca6SJunchao Zhang sf->nranks = size; 577dd5b3ca6SJunchao Zhang sf->minleaf = 0; 578dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1; 579dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) { 580dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N; 581dd5b3ca6SJunchao Zhang sf->nroots = n; 582dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size; 583dd5b3ca6SJunchao Zhang sf->minleaf = 0; 584dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1; 585dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586dd5b3ca6SJunchao Zhang sf->nleaves = size; 587dd5b3ca6SJunchao Zhang sf->nroots = size; 588dd5b3ca6SJunchao Zhang sf->nranks = size; 589dd5b3ca6SJunchao Zhang sf->minleaf = 0; 590dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1; 591dd5b3ca6SJunchao Zhang } 592dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 593dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE; 594dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 595dd5b3ca6SJunchao Zhang } 596dd5b3ca6SJunchao Zhang 597dd5b3ca6SJunchao Zhang /*@ 59895fce210SBarry Smith PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 59995fce210SBarry Smith 60095fce210SBarry Smith Collective 60195fce210SBarry Smith 6024165533cSJose E. Roman Input Parameter: 60395fce210SBarry Smith . sf - star forest to invert 60495fce210SBarry Smith 6054165533cSJose E. Roman Output Parameter: 60695fce210SBarry Smith . isf - inverse of sf 6074165533cSJose E. Roman 60895fce210SBarry Smith Level: advanced 60995fce210SBarry Smith 61095fce210SBarry Smith Notes: 61195fce210SBarry Smith All roots must have degree 1. 61295fce210SBarry Smith 61395fce210SBarry Smith The local space may be a permutation, but cannot be sparse. 61495fce210SBarry Smith 615db781477SPatrick Sanan .seealso: `PetscSFSetGraph()` 61695fce210SBarry Smith @*/ 6179371c9d4SSatish Balay PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) { 61895fce210SBarry Smith PetscMPIInt rank; 61995fce210SBarry Smith PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 62095fce210SBarry Smith const PetscInt *ilocal; 62195fce210SBarry Smith PetscSFNode *roots, *leaves; 62295fce210SBarry Smith 62395fce210SBarry Smith PetscFunctionBegin; 62429046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 62529046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 62629046d53SLisandro Dalcin PetscValidPointer(isf, 2); 62729046d53SLisandro Dalcin 6289566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 62929046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 63029046d53SLisandro Dalcin 6319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 6329566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 633ae9aee6dSMatthew G. Knepley for (i = 0; i < maxlocal; i++) { 63495fce210SBarry Smith leaves[i].rank = rank; 63595fce210SBarry Smith leaves[i].index = i; 63695fce210SBarry Smith } 63795fce210SBarry Smith for (i = 0; i < nroots; i++) { 63895fce210SBarry Smith roots[i].rank = -1; 63995fce210SBarry Smith roots[i].index = -1; 64095fce210SBarry Smith } 6419566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 6429566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 64395fce210SBarry Smith 64495fce210SBarry Smith /* Check whether our leaves are sparse */ 6459371c9d4SSatish Balay for (i = 0, count = 0; i < nroots; i++) 6469371c9d4SSatish Balay if (roots[i].rank >= 0) count++; 64795fce210SBarry Smith if (count == nroots) newilocal = NULL; 6489371c9d4SSatish Balay else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 64995fce210SBarry Smith for (i = 0, count = 0; i < nroots; i++) { 65095fce210SBarry Smith if (roots[i].rank >= 0) { 65195fce210SBarry Smith newilocal[count] = i; 65295fce210SBarry Smith roots[count].rank = roots[i].rank; 65395fce210SBarry Smith roots[count].index = roots[i].index; 65495fce210SBarry Smith count++; 65595fce210SBarry Smith } 65695fce210SBarry Smith } 65795fce210SBarry Smith } 65895fce210SBarry Smith 6599566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 6609566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 6619566063dSJacob Faibussowitsch PetscCall(PetscFree2(roots, leaves)); 66295fce210SBarry Smith PetscFunctionReturn(0); 66395fce210SBarry Smith } 66495fce210SBarry Smith 66595fce210SBarry Smith /*@ 66695fce210SBarry Smith PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 66795fce210SBarry Smith 66895fce210SBarry Smith Collective 66995fce210SBarry Smith 6704165533cSJose E. Roman Input Parameters: 67195fce210SBarry Smith + sf - communication object to duplicate 67295fce210SBarry Smith - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 67395fce210SBarry Smith 6744165533cSJose E. Roman Output Parameter: 67595fce210SBarry Smith . newsf - new communication object 67695fce210SBarry Smith 67795fce210SBarry Smith Level: beginner 67895fce210SBarry Smith 679db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 68095fce210SBarry Smith @*/ 6819371c9d4SSatish Balay PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) { 68229046d53SLisandro Dalcin PetscSFType type; 68397929ea7SJunchao Zhang MPI_Datatype dtype = MPIU_SCALAR; 68495fce210SBarry Smith 68595fce210SBarry Smith PetscFunctionBegin; 68629046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 68729046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf, opt, 2); 68829046d53SLisandro Dalcin PetscValidPointer(newsf, 3); 6899566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 6909566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 6919566063dSJacob Faibussowitsch if (type) PetscCall(PetscSFSetType(*newsf, type)); 69254a24607SJunchao Zhang (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 69395fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) { 694dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf, 1); 695dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 69695fce210SBarry Smith PetscInt nroots, nleaves; 69795fce210SBarry Smith const PetscInt *ilocal; 69895fce210SBarry Smith const PetscSFNode *iremote; 6999566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 7009566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 701dd5b3ca6SJunchao Zhang } else { 7029566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 703dd5b3ca6SJunchao Zhang } 70495fce210SBarry Smith } 70597929ea7SJunchao Zhang /* Since oldtype is committed, so is newtype, according to MPI */ 7069566063dSJacob Faibussowitsch if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 70797929ea7SJunchao Zhang (*newsf)->vscat.bs = sf->vscat.bs; 70897929ea7SJunchao Zhang (*newsf)->vscat.unit = dtype; 70997929ea7SJunchao Zhang (*newsf)->vscat.to_n = sf->vscat.to_n; 71097929ea7SJunchao Zhang (*newsf)->vscat.from_n = sf->vscat.from_n; 71197929ea7SJunchao Zhang /* Do not copy lsf. Build it on demand since it is rarely used */ 71297929ea7SJunchao Zhang 71320c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 71420c24465SJunchao Zhang (*newsf)->backend = sf->backend; 71571438e86SJunchao Zhang (*newsf)->unknown_input_stream = sf->unknown_input_stream; 71620c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 71720c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 71820c24465SJunchao Zhang #endif 719dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 72020c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 72195fce210SBarry Smith PetscFunctionReturn(0); 72295fce210SBarry Smith } 72395fce210SBarry Smith 72495fce210SBarry Smith /*@C 72595fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest 72695fce210SBarry Smith 72795fce210SBarry Smith Not Collective 72895fce210SBarry Smith 7294165533cSJose E. Roman Input Parameter: 73095fce210SBarry Smith . sf - star forest 73195fce210SBarry Smith 7324165533cSJose E. Roman Output Parameters: 73395fce210SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 73495fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 735bc6585dcSJunchao Zhang . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 73695fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process 73795fce210SBarry Smith 738373e0d91SLisandro Dalcin Notes: 739373e0d91SLisandro Dalcin We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 740373e0d91SLisandro Dalcin 741db2b9530SVaclav Hapla The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 742db2b9530SVaclav Hapla see its manpage for details. 743db2b9530SVaclav Hapla 7448dbb0df6SBarry Smith Fortran Notes: 7458dbb0df6SBarry Smith The returned iremote array is a copy and must be deallocated after use. Consequently, if you 7468dbb0df6SBarry Smith want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 7478dbb0df6SBarry Smith 7488dbb0df6SBarry Smith To check for a NULL ilocal use 7498dbb0df6SBarry Smith $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 750ca797d7aSLawrence Mitchell 75195fce210SBarry Smith Level: intermediate 75295fce210SBarry Smith 753db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 75495fce210SBarry Smith @*/ 7559371c9d4SSatish Balay PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) { 75695fce210SBarry Smith PetscFunctionBegin; 75795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 758b8dee149SJunchao Zhang if (sf->ops->GetGraph) { 7599566063dSJacob Faibussowitsch PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 760b8dee149SJunchao Zhang } else { 76195fce210SBarry Smith if (nroots) *nroots = sf->nroots; 76295fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves; 76395fce210SBarry Smith if (ilocal) *ilocal = sf->mine; 76495fce210SBarry Smith if (iremote) *iremote = sf->remote; 765b8dee149SJunchao Zhang } 76695fce210SBarry Smith PetscFunctionReturn(0); 76795fce210SBarry Smith } 76895fce210SBarry Smith 76929046d53SLisandro Dalcin /*@ 77095fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges 77195fce210SBarry Smith 77295fce210SBarry Smith Not Collective 77395fce210SBarry Smith 7744165533cSJose E. Roman Input Parameter: 77595fce210SBarry Smith . sf - star forest 77695fce210SBarry Smith 7774165533cSJose E. Roman Output Parameters: 778dd5b3ca6SJunchao Zhang + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 779dd5b3ca6SJunchao Zhang - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 78095fce210SBarry Smith 78195fce210SBarry Smith Level: developer 78295fce210SBarry Smith 783db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 78495fce210SBarry Smith @*/ 7859371c9d4SSatish Balay PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) { 78695fce210SBarry Smith PetscFunctionBegin; 78795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 78829046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 78995fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf; 79095fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf; 79195fce210SBarry Smith PetscFunctionReturn(0); 79295fce210SBarry Smith } 79395fce210SBarry Smith 79495fce210SBarry Smith /*@C 795fe2efc57SMark PetscSFViewFromOptions - View from Options 796fe2efc57SMark 797fe2efc57SMark Collective on PetscSF 798fe2efc57SMark 799fe2efc57SMark Input Parameters: 800fe2efc57SMark + A - the star forest 801736c3998SJose E. Roman . obj - Optional object 802736c3998SJose E. Roman - name - command line option 803fe2efc57SMark 804fe2efc57SMark Level: intermediate 805db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 806fe2efc57SMark @*/ 8079371c9d4SSatish Balay PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) { 808fe2efc57SMark PetscFunctionBegin; 809fe2efc57SMark PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 8109566063dSJacob Faibussowitsch PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 811fe2efc57SMark PetscFunctionReturn(0); 812fe2efc57SMark } 813fe2efc57SMark 814fe2efc57SMark /*@C 81595fce210SBarry Smith PetscSFView - view a star forest 81695fce210SBarry Smith 81795fce210SBarry Smith Collective 81895fce210SBarry Smith 8194165533cSJose E. Roman Input Parameters: 82095fce210SBarry Smith + sf - star forest 82195fce210SBarry Smith - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 82295fce210SBarry Smith 82395fce210SBarry Smith Level: beginner 82495fce210SBarry Smith 825db781477SPatrick Sanan .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 82695fce210SBarry Smith @*/ 8279371c9d4SSatish Balay PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) { 82895fce210SBarry Smith PetscBool iascii; 82995fce210SBarry Smith PetscViewerFormat format; 83095fce210SBarry Smith 83195fce210SBarry Smith PetscFunctionBegin; 83295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 8339566063dSJacob Faibussowitsch if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 83495fce210SBarry Smith PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 83595fce210SBarry Smith PetscCheckSameComm(sf, 1, viewer, 2); 8369566063dSJacob Faibussowitsch if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 8379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 83853dd6d7dSJunchao Zhang if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 83995fce210SBarry Smith PetscMPIInt rank; 84081bfa7aaSJed Brown PetscInt ii, i, j; 84195fce210SBarry Smith 8429566063dSJacob Faibussowitsch PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 8439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer)); 844dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 84580153354SVaclav Hapla if (!sf->graphset) { 8469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 8479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 84880153354SVaclav Hapla PetscFunctionReturn(0); 84980153354SVaclav Hapla } 8509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 8519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 8529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 853*48a46eb9SPierre Jolivet for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 8549566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 8559566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 85695fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 85781bfa7aaSJed Brown PetscMPIInt *tmpranks, *perm; 8589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 8599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 86081bfa7aaSJed Brown for (i = 0; i < sf->nranks; i++) perm[i] = i; 8619566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 8629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 86381bfa7aaSJed Brown for (ii = 0; ii < sf->nranks; ii++) { 86481bfa7aaSJed Brown i = perm[ii]; 8659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 866*48a46eb9SPierre Jolivet for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 86795fce210SBarry Smith } 8689566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmpranks, perm)); 86995fce210SBarry Smith } 8709566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 8719566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 872dd5b3ca6SJunchao Zhang } 8739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 87495fce210SBarry Smith } 875dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, View, viewer); 87695fce210SBarry Smith PetscFunctionReturn(0); 87795fce210SBarry Smith } 87895fce210SBarry Smith 87995fce210SBarry Smith /*@C 880dec1416fSJunchao Zhang PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 88195fce210SBarry Smith 88295fce210SBarry Smith Not Collective 88395fce210SBarry Smith 8844165533cSJose E. Roman Input Parameter: 88595fce210SBarry Smith . sf - star forest 88695fce210SBarry Smith 8874165533cSJose E. Roman Output Parameters: 88895fce210SBarry Smith + nranks - number of ranks referenced by local part 88995fce210SBarry Smith . ranks - array of ranks 89095fce210SBarry Smith . roffset - offset in rmine/rremote for each rank (length nranks+1) 89195fce210SBarry Smith . rmine - concatenated array holding local indices referencing each remote rank 89295fce210SBarry Smith - rremote - concatenated array holding remote indices referenced for each remote rank 89395fce210SBarry Smith 89495fce210SBarry Smith Level: developer 89595fce210SBarry Smith 896db781477SPatrick Sanan .seealso: `PetscSFGetLeafRanks()` 89795fce210SBarry Smith @*/ 8989371c9d4SSatish Balay PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) { 89995fce210SBarry Smith PetscFunctionBegin; 90095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 90128b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 902dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) { 9039566063dSJacob Faibussowitsch PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 904dec1416fSJunchao Zhang } else { 905dec1416fSJunchao Zhang /* The generic implementation */ 90695fce210SBarry Smith if (nranks) *nranks = sf->nranks; 90795fce210SBarry Smith if (ranks) *ranks = sf->ranks; 90895fce210SBarry Smith if (roffset) *roffset = sf->roffset; 90995fce210SBarry Smith if (rmine) *rmine = sf->rmine; 91095fce210SBarry Smith if (rremote) *rremote = sf->rremote; 911dec1416fSJunchao Zhang } 91295fce210SBarry Smith PetscFunctionReturn(0); 91395fce210SBarry Smith } 91495fce210SBarry Smith 9158750ddebSJunchao Zhang /*@C 9168750ddebSJunchao Zhang PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 9178750ddebSJunchao Zhang 9188750ddebSJunchao Zhang Not Collective 9198750ddebSJunchao Zhang 9204165533cSJose E. Roman Input Parameter: 9218750ddebSJunchao Zhang . sf - star forest 9228750ddebSJunchao Zhang 9234165533cSJose E. Roman Output Parameters: 9248750ddebSJunchao Zhang + niranks - number of leaf ranks referencing roots on this process 9258750ddebSJunchao Zhang . iranks - array of ranks 9268750ddebSJunchao Zhang . ioffset - offset in irootloc for each rank (length niranks+1) 9278750ddebSJunchao Zhang - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 9288750ddebSJunchao Zhang 9298750ddebSJunchao Zhang Level: developer 9308750ddebSJunchao Zhang 931db781477SPatrick Sanan .seealso: `PetscSFGetRootRanks()` 9328750ddebSJunchao Zhang @*/ 9339371c9d4SSatish Balay PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) { 9348750ddebSJunchao Zhang PetscFunctionBegin; 9358750ddebSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 93628b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 9378750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) { 9389566063dSJacob Faibussowitsch PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 9398750ddebSJunchao Zhang } else { 9408750ddebSJunchao Zhang PetscSFType type; 9419566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type)); 94298921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 9438750ddebSJunchao Zhang } 9448750ddebSJunchao Zhang PetscFunctionReturn(0); 9458750ddebSJunchao Zhang } 9468750ddebSJunchao Zhang 947b5a8e515SJed Brown static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) { 948b5a8e515SJed Brown PetscInt i; 949b5a8e515SJed Brown for (i = 0; i < n; i++) { 950b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE; 951b5a8e515SJed Brown } 952b5a8e515SJed Brown return PETSC_FALSE; 953b5a8e515SJed Brown } 954b5a8e515SJed Brown 95595fce210SBarry Smith /*@C 95621c688dcSJed Brown PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 95721c688dcSJed Brown 95821c688dcSJed Brown Collective 95921c688dcSJed Brown 9604165533cSJose E. Roman Input Parameters: 961b5a8e515SJed Brown + sf - PetscSF to set up; PetscSFSetGraph() must have been called 962b5a8e515SJed Brown - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 96321c688dcSJed Brown 96421c688dcSJed Brown Level: developer 96521c688dcSJed Brown 966db781477SPatrick Sanan .seealso: `PetscSFGetRootRanks()` 96721c688dcSJed Brown @*/ 9689371c9d4SSatish Balay PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) { 96921c688dcSJed Brown PetscTable table; 97021c688dcSJed Brown PetscTablePosition pos; 971b5a8e515SJed Brown PetscMPIInt size, groupsize, *groupranks; 972247e8311SStefano Zampini PetscInt *rcount, *ranks; 973247e8311SStefano Zampini PetscInt i, irank = -1, orank = -1; 97421c688dcSJed Brown 97521c688dcSJed Brown PetscFunctionBegin; 97621c688dcSJed Brown PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 97729046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 9789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 9799566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(10, size, &table)); 98021c688dcSJed Brown for (i = 0; i < sf->nleaves; i++) { 98121c688dcSJed Brown /* Log 1-based rank */ 9829566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 98321c688dcSJed Brown } 9849566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(table, &sf->nranks)); 9859566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 9869566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 9879566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(table, &pos)); 98821c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 9899566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 99021c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */ 99121c688dcSJed Brown } 9929566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&table)); 993b5a8e515SJed Brown 994b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */ 995b5a8e515SJed Brown { 9967fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 997b5a8e515SJed Brown PetscMPIInt *dgroupranks; 9989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 9999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 10009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 10019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &groupranks)); 1002b5a8e515SJed Brown for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 10039566063dSJacob Faibussowitsch if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 10049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 10059566063dSJacob Faibussowitsch PetscCall(PetscFree(dgroupranks)); 1006b5a8e515SJed Brown } 1007b5a8e515SJed Brown 1008b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1009b5a8e515SJed Brown for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1010b5a8e515SJed Brown for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1011b5a8e515SJed Brown if (InList(ranks[i], groupsize, groupranks)) break; 1012b5a8e515SJed Brown } 1013b5a8e515SJed Brown for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1014b5a8e515SJed Brown if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1015b5a8e515SJed Brown } 1016b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1017b5a8e515SJed Brown PetscInt tmprank, tmpcount; 1018247e8311SStefano Zampini 1019b5a8e515SJed Brown tmprank = ranks[i]; 1020b5a8e515SJed Brown tmpcount = rcount[i]; 1021b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks]; 1022b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks]; 1023b5a8e515SJed Brown ranks[sf->ndranks] = tmprank; 1024b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount; 1025b5a8e515SJed Brown sf->ndranks++; 1026b5a8e515SJed Brown } 1027b5a8e515SJed Brown } 10289566063dSJacob Faibussowitsch PetscCall(PetscFree(groupranks)); 10299566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 10309566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 103121c688dcSJed Brown sf->roffset[0] = 0; 103221c688dcSJed Brown for (i = 0; i < sf->nranks; i++) { 10339566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 103421c688dcSJed Brown sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 103521c688dcSJed Brown rcount[i] = 0; 103621c688dcSJed Brown } 1037247e8311SStefano Zampini for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1038247e8311SStefano Zampini /* short circuit */ 1039247e8311SStefano Zampini if (orank != sf->remote[i].rank) { 104021c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */ 10419566063dSJacob Faibussowitsch PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1042b5a8e515SJed Brown if (irank < 0) { 10439566063dSJacob Faibussowitsch PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1044b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks; 104521c688dcSJed Brown } 1046247e8311SStefano Zampini orank = sf->remote[i].rank; 1047247e8311SStefano Zampini } 104808401ef6SPierre Jolivet PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 104921c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 105021c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 105121c688dcSJed Brown rcount[irank]++; 105221c688dcSJed Brown } 10539566063dSJacob Faibussowitsch PetscCall(PetscFree2(rcount, ranks)); 105421c688dcSJed Brown PetscFunctionReturn(0); 105521c688dcSJed Brown } 105621c688dcSJed Brown 105721c688dcSJed Brown /*@C 105895fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups 105995fce210SBarry Smith 106095fce210SBarry Smith Collective 106195fce210SBarry Smith 10624165533cSJose E. Roman Input Parameter: 106395fce210SBarry Smith . sf - star forest 106495fce210SBarry Smith 10654165533cSJose E. Roman Output Parameters: 106695fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots) 106795fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference) 106895fce210SBarry Smith 106995fce210SBarry Smith Level: developer 107095fce210SBarry Smith 1071db781477SPatrick Sanan .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 107295fce210SBarry Smith @*/ 10739371c9d4SSatish Balay PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) { 10747fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 107595fce210SBarry Smith 107695fce210SBarry Smith PetscFunctionBegin; 107708401ef6SPierre Jolivet PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 107895fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) { 107995fce210SBarry Smith PetscInt i; 108095fce210SBarry Smith const PetscInt *indegree; 108195fce210SBarry Smith PetscMPIInt rank, *outranks, *inranks; 108295fce210SBarry Smith PetscSFNode *remote; 108395fce210SBarry Smith PetscSF bgcount; 108495fce210SBarry Smith 108595fce210SBarry Smith /* Compute the number of incoming ranks */ 10869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &remote)); 108795fce210SBarry Smith for (i = 0; i < sf->nranks; i++) { 108895fce210SBarry Smith remote[i].rank = sf->ranks[i]; 108995fce210SBarry Smith remote[i].index = 0; 109095fce210SBarry Smith } 10919566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 10929566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 10939566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 10949566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 109595fce210SBarry Smith /* Enumerate the incoming ranks */ 10969566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 10979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 109895fce210SBarry Smith for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 10999566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 11009566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 11019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 11039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 11049566063dSJacob Faibussowitsch PetscCall(PetscFree2(inranks, outranks)); 11059566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&bgcount)); 110695fce210SBarry Smith } 110795fce210SBarry Smith *incoming = sf->ingroup; 110895fce210SBarry Smith 110995fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) { 11109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 11119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 11129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 111395fce210SBarry Smith } 111495fce210SBarry Smith *outgoing = sf->outgroup; 111595fce210SBarry Smith PetscFunctionReturn(0); 111695fce210SBarry Smith } 111795fce210SBarry Smith 111829046d53SLisandro Dalcin /*@ 11193b8d980fSPierre Jolivet PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 112095fce210SBarry Smith 112195fce210SBarry Smith Collective 112295fce210SBarry Smith 11234165533cSJose E. Roman Input Parameter: 112495fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex 112595fce210SBarry Smith 11264165533cSJose E. Roman Output Parameter: 112795fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 112895fce210SBarry Smith 112995fce210SBarry Smith Level: developer 113095fce210SBarry Smith 113195fce210SBarry Smith Notes: 113295fce210SBarry Smith 113395fce210SBarry Smith In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 113495fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 113595fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal. 113695fce210SBarry Smith 1137db781477SPatrick Sanan .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 113895fce210SBarry Smith @*/ 11399371c9d4SSatish Balay PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) { 114095fce210SBarry Smith PetscFunctionBegin; 114195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 114295fce210SBarry Smith PetscValidPointer(multi, 2); 114395fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 11449566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 114595fce210SBarry Smith *multi = sf->multi; 1146013b3241SStefano Zampini sf->multi->multi = sf->multi; 114795fce210SBarry Smith PetscFunctionReturn(0); 114895fce210SBarry Smith } 114995fce210SBarry Smith if (!sf->multi) { 115095fce210SBarry Smith const PetscInt *indegree; 11519837ea96SMatthew G. Knepley PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 115295fce210SBarry Smith PetscSFNode *remote; 115329046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 11549566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 11559566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 11569566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 115795fce210SBarry Smith inoffset[0] = 0; 115895fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 11599837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outones[i] = 1; 11609566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 11619566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 116295fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 116376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 11649371c9d4SSatish Balay for (i = 0; i < sf->nroots; i++) { PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); } 116576bd3646SJed Brown } 11669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &remote)); 116795fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 116895fce210SBarry Smith remote[i].rank = sf->remote[i].rank; 116938e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 117095fce210SBarry Smith } 11719566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1172013b3241SStefano Zampini sf->multi->multi = sf->multi; 11739566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 117495fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */ 117595fce210SBarry Smith PetscMPIInt rank; 117695fce210SBarry Smith PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 117795fce210SBarry Smith PetscSFNode *newremote; 11789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 117995fce210SBarry Smith for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 11809566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 11819837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outranks[i] = rank; 11829566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 11839566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 118495fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */ 118595fce210SBarry Smith for (i = 0; i < sf->nroots; i++) { 118695fce210SBarry Smith PetscInt j; 118795fce210SBarry Smith for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 11889566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 118995fce210SBarry Smith for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 119095fce210SBarry Smith } 11919566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 11929566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 11939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 119495fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) { 119595fce210SBarry Smith newremote[i].rank = sf->remote[i].rank; 119601365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 119795fce210SBarry Smith } 11989566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 11999566063dSJacob Faibussowitsch PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 120095fce210SBarry Smith } 12019566063dSJacob Faibussowitsch PetscCall(PetscFree3(inoffset, outones, outoffset)); 120295fce210SBarry Smith } 120395fce210SBarry Smith *multi = sf->multi; 120495fce210SBarry Smith PetscFunctionReturn(0); 120595fce210SBarry Smith } 120695fce210SBarry Smith 120795fce210SBarry Smith /*@C 120872502a1fSJunchao Zhang PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 120995fce210SBarry Smith 121095fce210SBarry Smith Collective 121195fce210SBarry Smith 12124165533cSJose E. Roman Input Parameters: 121395fce210SBarry Smith + sf - original star forest 1214ba2a7774SJunchao Zhang . nselected - number of selected roots on this process 1215ba2a7774SJunchao Zhang - selected - indices of the selected roots on this process 121695fce210SBarry Smith 12174165533cSJose E. Roman Output Parameter: 1218cd620004SJunchao Zhang . esf - new star forest 121995fce210SBarry Smith 122095fce210SBarry Smith Level: advanced 122195fce210SBarry Smith 122295fce210SBarry Smith Note: 122395fce210SBarry Smith To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 122495fce210SBarry Smith be done by calling PetscSFGetGraph(). 122595fce210SBarry Smith 1226db781477SPatrick Sanan .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 122795fce210SBarry Smith @*/ 12289371c9d4SSatish Balay PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) { 1229cd620004SJunchao Zhang PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1230cd620004SJunchao Zhang const PetscInt *ilocal; 1231cd620004SJunchao Zhang signed char *rootdata, *leafdata, *leafmem; 1232ba2a7774SJunchao Zhang const PetscSFNode *iremote; 1233f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1234f659e5c7SJunchao Zhang MPI_Comm comm; 123595fce210SBarry Smith 123695fce210SBarry Smith PetscFunctionBegin; 123795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 123829046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 1239dadcf809SJacob Faibussowitsch if (nselected) PetscValidIntPointer(selected, 3); 1240cd620004SJunchao Zhang PetscValidPointer(esf, 4); 12410511a646SMatthew G. Knepley 12429566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 12439566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 12449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 12459566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1246cd620004SJunchao Zhang 124776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1248cd620004SJunchao Zhang PetscBool dups; 12499566063dSJacob Faibussowitsch PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 125028b400f6SJacob Faibussowitsch PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 12519371c9d4SSatish Balay for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1252cd620004SJunchao Zhang } 1253f659e5c7SJunchao Zhang 1254dbbe0bcdSBarry Smith if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1255dbbe0bcdSBarry Smith else { 1256cd620004SJunchao Zhang /* A generic version of creating embedded sf */ 12579566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1258cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 12599566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1260cd620004SJunchao Zhang leafdata = leafmem - minleaf; 1261cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */ 1262cd620004SJunchao Zhang for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 12639566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 12649566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1265ba2a7774SJunchao Zhang 1266cd620004SJunchao Zhang /* Build esf with leaves that are still connected */ 1267cd620004SJunchao Zhang esf_nleaves = 0; 1268cd620004SJunchao Zhang for (i = 0; i < nleaves; i++) { 1269cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1270cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1271cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1272cd620004SJunchao Zhang */ 1273cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0); 1274cd620004SJunchao Zhang } 12759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 12769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1277cd620004SJunchao Zhang for (i = n = 0; i < nleaves; i++) { 1278cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1279cd620004SJunchao Zhang if (leafdata[j]) { 1280cd620004SJunchao Zhang new_ilocal[n] = j; 1281cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank; 1282cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index; 1283fc1ede2bSMatthew G. Knepley ++n; 128495fce210SBarry Smith } 128595fce210SBarry Smith } 12869566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, esf)); 12879566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*esf)); 12889566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 12899566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 1290f659e5c7SJunchao Zhang } 12919566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 129295fce210SBarry Smith PetscFunctionReturn(0); 129395fce210SBarry Smith } 129495fce210SBarry Smith 12952f5fb4c2SMatthew G. Knepley /*@C 12962f5fb4c2SMatthew G. Knepley PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 12972f5fb4c2SMatthew G. Knepley 12982f5fb4c2SMatthew G. Knepley Collective 12992f5fb4c2SMatthew G. Knepley 13004165533cSJose E. Roman Input Parameters: 13012f5fb4c2SMatthew G. Knepley + sf - original star forest 1302f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process 1303f659e5c7SJunchao Zhang - selected - indices of the selected leaves on this process 13042f5fb4c2SMatthew G. Knepley 13054165533cSJose E. Roman Output Parameter: 13062f5fb4c2SMatthew G. Knepley . newsf - new star forest 13072f5fb4c2SMatthew G. Knepley 13082f5fb4c2SMatthew G. Knepley Level: advanced 13092f5fb4c2SMatthew G. Knepley 1310db781477SPatrick Sanan .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 13112f5fb4c2SMatthew G. Knepley @*/ 13129371c9d4SSatish Balay PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 1313f659e5c7SJunchao Zhang const PetscSFNode *iremote; 1314f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1315f659e5c7SJunchao Zhang const PetscInt *ilocal; 1316f659e5c7SJunchao Zhang PetscInt i, nroots, *leaves, *new_ilocal; 1317f659e5c7SJunchao Zhang MPI_Comm comm; 13182f5fb4c2SMatthew G. Knepley 13192f5fb4c2SMatthew G. Knepley PetscFunctionBegin; 13202f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132129046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1); 1322dadcf809SJacob Faibussowitsch if (nselected) PetscValidIntPointer(selected, 3); 13232f5fb4c2SMatthew G. Knepley PetscValidPointer(newsf, 4); 13242f5fb4c2SMatthew G. Knepley 1325f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */ 13269566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 13279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &leaves)); 13289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(leaves, selected, nselected)); 13299566063dSJacob Faibussowitsch PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 133008401ef6SPierre Jolivet PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1331f659e5c7SJunchao Zhang 1332f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1333dbbe0bcdSBarry Smith if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1334dbbe0bcdSBarry Smith else { 13359566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 13369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_ilocal)); 13379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_iremote)); 1338f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) { 1339f659e5c7SJunchao Zhang const PetscInt l = leaves[i]; 1340f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 1341f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 1342f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 13432f5fb4c2SMatthew G. Knepley } 13449566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 13459566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1346f659e5c7SJunchao Zhang } 13479566063dSJacob Faibussowitsch PetscCall(PetscFree(leaves)); 13482f5fb4c2SMatthew G. Knepley PetscFunctionReturn(0); 13492f5fb4c2SMatthew G. Knepley } 13502f5fb4c2SMatthew G. Knepley 135195fce210SBarry Smith /*@C 1352ad227feaSJunchao Zhang PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 13533482bfa8SJunchao Zhang 13543482bfa8SJunchao Zhang Collective on PetscSF 13553482bfa8SJunchao Zhang 13564165533cSJose E. Roman Input Parameters: 13573482bfa8SJunchao Zhang + sf - star forest on which to communicate 13583482bfa8SJunchao Zhang . unit - data type associated with each node 13593482bfa8SJunchao Zhang . rootdata - buffer to broadcast 13603482bfa8SJunchao Zhang - op - operation to use for reduction 13613482bfa8SJunchao Zhang 13624165533cSJose E. Roman Output Parameter: 13633482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 13643482bfa8SJunchao Zhang 13653482bfa8SJunchao Zhang Level: intermediate 13663482bfa8SJunchao Zhang 1367d0295fc0SJunchao Zhang Notes: 1368d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1369d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1370ad227feaSJunchao Zhang use PetscSFBcastWithMemTypeBegin() instead. 1371db781477SPatrick Sanan .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 13723482bfa8SJunchao Zhang @*/ 13739371c9d4SSatish Balay PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 1374eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 13753482bfa8SJunchao Zhang 13763482bfa8SJunchao Zhang PetscFunctionBegin; 13773482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 13789566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 13799566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 13809566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 13819566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1382dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 13839566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 13843482bfa8SJunchao Zhang PetscFunctionReturn(0); 13853482bfa8SJunchao Zhang } 13863482bfa8SJunchao Zhang 13873482bfa8SJunchao Zhang /*@C 1388ad227feaSJunchao Zhang PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1389d0295fc0SJunchao Zhang 1390d0295fc0SJunchao Zhang Collective on PetscSF 1391d0295fc0SJunchao Zhang 13924165533cSJose E. Roman Input Parameters: 1393d0295fc0SJunchao Zhang + sf - star forest on which to communicate 1394d0295fc0SJunchao Zhang . unit - data type associated with each node 1395d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1396d0295fc0SJunchao Zhang . rootdata - buffer to broadcast 1397d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1398d0295fc0SJunchao Zhang - op - operation to use for reduction 1399d0295fc0SJunchao Zhang 14004165533cSJose E. Roman Output Parameter: 1401d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 1402d0295fc0SJunchao Zhang 1403d0295fc0SJunchao Zhang Level: intermediate 1404d0295fc0SJunchao Zhang 1405db781477SPatrick Sanan .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1406d0295fc0SJunchao Zhang @*/ 14079371c9d4SSatish Balay PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 1408d0295fc0SJunchao Zhang PetscFunctionBegin; 1409d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 14109566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 14119566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1412dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 14139566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1414d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1415d0295fc0SJunchao Zhang } 1416d0295fc0SJunchao Zhang 1417d0295fc0SJunchao Zhang /*@C 1418ad227feaSJunchao Zhang PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 14193482bfa8SJunchao Zhang 14203482bfa8SJunchao Zhang Collective 14213482bfa8SJunchao Zhang 14224165533cSJose E. Roman Input Parameters: 14233482bfa8SJunchao Zhang + sf - star forest 14243482bfa8SJunchao Zhang . unit - data type 14253482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14263482bfa8SJunchao Zhang - op - operation to use for reduction 14273482bfa8SJunchao Zhang 14284165533cSJose E. Roman Output Parameter: 14293482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14303482bfa8SJunchao Zhang 14313482bfa8SJunchao Zhang Level: intermediate 14323482bfa8SJunchao Zhang 1433db781477SPatrick Sanan .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 14343482bfa8SJunchao Zhang @*/ 14359371c9d4SSatish Balay PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 14363482bfa8SJunchao Zhang PetscFunctionBegin; 14373482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 14389566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1439dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 14409566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 14413482bfa8SJunchao Zhang PetscFunctionReturn(0); 14423482bfa8SJunchao Zhang } 14433482bfa8SJunchao Zhang 14443482bfa8SJunchao Zhang /*@C 144595fce210SBarry Smith PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 144695fce210SBarry Smith 144795fce210SBarry Smith Collective 144895fce210SBarry Smith 14494165533cSJose E. Roman Input Parameters: 145095fce210SBarry Smith + sf - star forest 145195fce210SBarry Smith . unit - data type 145295fce210SBarry Smith . leafdata - values to reduce 145395fce210SBarry Smith - op - reduction operation 145495fce210SBarry Smith 14554165533cSJose E. Roman Output Parameter: 145695fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 145795fce210SBarry Smith 145895fce210SBarry Smith Level: intermediate 145995fce210SBarry Smith 1460d0295fc0SJunchao Zhang Notes: 1461d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1462d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1463d0295fc0SJunchao Zhang use PetscSFReduceWithMemTypeBegin() instead. 1464d0295fc0SJunchao Zhang 1465db781477SPatrick Sanan .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 146695fce210SBarry Smith @*/ 14679371c9d4SSatish Balay PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 1468eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 146995fce210SBarry Smith 147095fce210SBarry Smith PetscFunctionBegin; 147195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 14729566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 14739566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 14749566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 14759566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 14769566063dSJacob Faibussowitsch PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 14779566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 147895fce210SBarry Smith PetscFunctionReturn(0); 147995fce210SBarry Smith } 148095fce210SBarry Smith 148195fce210SBarry Smith /*@C 1482d0295fc0SJunchao Zhang PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1483d0295fc0SJunchao Zhang 1484d0295fc0SJunchao Zhang Collective 1485d0295fc0SJunchao Zhang 14864165533cSJose E. Roman Input Parameters: 1487d0295fc0SJunchao Zhang + sf - star forest 1488d0295fc0SJunchao Zhang . unit - data type 1489d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1490d0295fc0SJunchao Zhang . leafdata - values to reduce 1491d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1492d0295fc0SJunchao Zhang - op - reduction operation 1493d0295fc0SJunchao Zhang 14944165533cSJose E. Roman Output Parameter: 1495d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root 1496d0295fc0SJunchao Zhang 1497d0295fc0SJunchao Zhang Level: intermediate 1498d0295fc0SJunchao Zhang 1499db781477SPatrick Sanan .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1500d0295fc0SJunchao Zhang @*/ 15019371c9d4SSatish Balay PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 1502d0295fc0SJunchao Zhang PetscFunctionBegin; 1503d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15049566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15059566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 15069566063dSJacob Faibussowitsch PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 15079566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1508d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1509d0295fc0SJunchao Zhang } 1510d0295fc0SJunchao Zhang 1511d0295fc0SJunchao Zhang /*@C 151295fce210SBarry Smith PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 151395fce210SBarry Smith 151495fce210SBarry Smith Collective 151595fce210SBarry Smith 15164165533cSJose E. Roman Input Parameters: 151795fce210SBarry Smith + sf - star forest 151895fce210SBarry Smith . unit - data type 151995fce210SBarry Smith . leafdata - values to reduce 152095fce210SBarry Smith - op - reduction operation 152195fce210SBarry Smith 15224165533cSJose E. Roman Output Parameter: 152395fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 152495fce210SBarry Smith 152595fce210SBarry Smith Level: intermediate 152695fce210SBarry Smith 1527db781477SPatrick Sanan .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 152895fce210SBarry Smith @*/ 15299371c9d4SSatish Balay PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 153095fce210SBarry Smith PetscFunctionBegin; 153195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15329566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1533dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 15349566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 153595fce210SBarry Smith PetscFunctionReturn(0); 153695fce210SBarry Smith } 153795fce210SBarry Smith 153895fce210SBarry Smith /*@C 1539a1729e3fSJunchao Zhang PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1540a1729e3fSJunchao Zhang 1541a1729e3fSJunchao Zhang Collective 1542a1729e3fSJunchao Zhang 15434165533cSJose E. Roman Input Parameters: 1544a1729e3fSJunchao Zhang + sf - star forest 1545a1729e3fSJunchao Zhang . unit - data type 1546a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1547a1729e3fSJunchao Zhang - op - operation to use for reduction 1548a1729e3fSJunchao Zhang 15494165533cSJose E. Roman Output Parameters: 1550a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1551a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1552a1729e3fSJunchao Zhang 1553a1729e3fSJunchao Zhang Level: advanced 1554a1729e3fSJunchao Zhang 1555a1729e3fSJunchao Zhang Note: 1556a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1557a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1558a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1559a1729e3fSJunchao Zhang integers. 1560a1729e3fSJunchao Zhang 1561db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1562a1729e3fSJunchao Zhang @*/ 15639371c9d4SSatish Balay PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1564eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype, leafupdatemtype; 1565a1729e3fSJunchao Zhang 1566a1729e3fSJunchao Zhang PetscFunctionBegin; 1567a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 15689566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 15699566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 15709566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 15719566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 15729566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 157308401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1574dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 15759566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1576a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1577a1729e3fSJunchao Zhang } 1578a1729e3fSJunchao Zhang 1579a1729e3fSJunchao Zhang /*@C 1580d3b3e55cSJunchao Zhang PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1581d3b3e55cSJunchao Zhang 1582d3b3e55cSJunchao Zhang Collective 1583d3b3e55cSJunchao Zhang 1584d3b3e55cSJunchao Zhang Input Parameters: 1585d3b3e55cSJunchao Zhang + sf - star forest 1586d3b3e55cSJunchao Zhang . unit - data type 1587d3b3e55cSJunchao Zhang . rootmtype - memory type of rootdata 1588d3b3e55cSJunchao Zhang . leafmtype - memory type of leafdata 1589d3b3e55cSJunchao Zhang . leafdata - leaf values to use in reduction 1590d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate 1591d3b3e55cSJunchao Zhang - op - operation to use for reduction 1592d3b3e55cSJunchao Zhang 1593d3b3e55cSJunchao Zhang Output Parameters: 1594d3b3e55cSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1595d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1596d3b3e55cSJunchao Zhang 1597d3b3e55cSJunchao Zhang Level: advanced 1598d3b3e55cSJunchao Zhang 1599d3b3e55cSJunchao Zhang Note: See PetscSFFetchAndOpBegin() for more details. 1600d3b3e55cSJunchao Zhang 1601c2e3fba1SPatrick Sanan .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1602d3b3e55cSJunchao Zhang @*/ 16039371c9d4SSatish Balay PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) { 1604d3b3e55cSJunchao Zhang PetscFunctionBegin; 1605d3b3e55cSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16069566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 16079566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 160808401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1609dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 16109566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1611d3b3e55cSJunchao Zhang PetscFunctionReturn(0); 1612d3b3e55cSJunchao Zhang } 1613d3b3e55cSJunchao Zhang 1614d3b3e55cSJunchao Zhang /*@C 1615a1729e3fSJunchao Zhang PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1616a1729e3fSJunchao Zhang 1617a1729e3fSJunchao Zhang Collective 1618a1729e3fSJunchao Zhang 16194165533cSJose E. Roman Input Parameters: 1620a1729e3fSJunchao Zhang + sf - star forest 1621a1729e3fSJunchao Zhang . unit - data type 1622a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1623a1729e3fSJunchao Zhang - op - operation to use for reduction 1624a1729e3fSJunchao Zhang 16254165533cSJose E. Roman Output Parameters: 1626a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1627a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1628a1729e3fSJunchao Zhang 1629a1729e3fSJunchao Zhang Level: advanced 1630a1729e3fSJunchao Zhang 1631db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1632a1729e3fSJunchao Zhang @*/ 16339371c9d4SSatish Balay PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1634a1729e3fSJunchao Zhang PetscFunctionBegin; 1635a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 16369566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1637dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 16389566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1639a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1640a1729e3fSJunchao Zhang } 1641a1729e3fSJunchao Zhang 1642a1729e3fSJunchao Zhang /*@C 164395fce210SBarry Smith PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 164495fce210SBarry Smith 164595fce210SBarry Smith Collective 164695fce210SBarry Smith 16474165533cSJose E. Roman Input Parameter: 164895fce210SBarry Smith . sf - star forest 164995fce210SBarry Smith 16504165533cSJose E. Roman Output Parameter: 165195fce210SBarry Smith . degree - degree of each root vertex 165295fce210SBarry Smith 165395fce210SBarry Smith Level: advanced 165495fce210SBarry Smith 1655ffe67aa5SVáclav Hapla Notes: 1656ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1657ffe67aa5SVáclav Hapla 1658db781477SPatrick Sanan .seealso: `PetscSFGatherBegin()` 165995fce210SBarry Smith @*/ 16609371c9d4SSatish Balay PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) { 166195fce210SBarry Smith PetscFunctionBegin; 166295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 166395fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 166495fce210SBarry Smith PetscValidPointer(degree, 2); 1665803bd9e8SMatthew G. Knepley if (!sf->degreeknown) { 16665b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal; 166728b400f6SJacob Faibussowitsch PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 16685b0d146aSStefano Zampini maxlocal = sf->maxleaf - sf->minleaf + 1; 16699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &sf->degree)); 16709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 167129046d53SLisandro Dalcin for (i = 0; i < nroots; i++) sf->degree[i] = 0; 16729837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 16739566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 167495fce210SBarry Smith } 167595fce210SBarry Smith *degree = NULL; 167695fce210SBarry Smith PetscFunctionReturn(0); 167795fce210SBarry Smith } 167895fce210SBarry Smith 167995fce210SBarry Smith /*@C 168095fce210SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 168195fce210SBarry Smith 168295fce210SBarry Smith Collective 168395fce210SBarry Smith 16844165533cSJose E. Roman Input Parameter: 168595fce210SBarry Smith . sf - star forest 168695fce210SBarry Smith 16874165533cSJose E. Roman Output Parameter: 168895fce210SBarry Smith . degree - degree of each root vertex 168995fce210SBarry Smith 169095fce210SBarry Smith Level: developer 169195fce210SBarry Smith 1692ffe67aa5SVáclav Hapla Notes: 1693ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1694ffe67aa5SVáclav Hapla 169595fce210SBarry Smith .seealso: 169695fce210SBarry Smith @*/ 16979371c9d4SSatish Balay PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) { 169895fce210SBarry Smith PetscFunctionBegin; 169995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 170095fce210SBarry Smith PetscSFCheckGraphSet(sf, 1); 170129046d53SLisandro Dalcin PetscValidPointer(degree, 2); 170295fce210SBarry Smith if (!sf->degreeknown) { 170328b400f6SJacob Faibussowitsch PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 17049566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 17059566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degreetmp)); 170695fce210SBarry Smith sf->degreeknown = PETSC_TRUE; 170795fce210SBarry Smith } 170895fce210SBarry Smith *degree = sf->degree; 170995fce210SBarry Smith PetscFunctionReturn(0); 171095fce210SBarry Smith } 171195fce210SBarry Smith 1712673100f5SVaclav Hapla /*@C 171366dfcd1aSVaclav Hapla PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 171466dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root. 1715673100f5SVaclav Hapla 1716673100f5SVaclav Hapla Collective 1717673100f5SVaclav Hapla 17184165533cSJose E. Roman Input Parameters: 1719673100f5SVaclav Hapla + sf - star forest 1720673100f5SVaclav Hapla - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1721673100f5SVaclav Hapla 17224165533cSJose E. Roman Output Parameters: 172366dfcd1aSVaclav Hapla + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 172466dfcd1aSVaclav Hapla - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1725673100f5SVaclav Hapla 1726673100f5SVaclav Hapla Level: developer 1727673100f5SVaclav Hapla 1728ffe67aa5SVáclav Hapla Notes: 1729ffe67aa5SVáclav Hapla The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1730ffe67aa5SVáclav Hapla 1731db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1732673100f5SVaclav Hapla @*/ 17339371c9d4SSatish Balay PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) { 1734673100f5SVaclav Hapla PetscSF msf; 1735673100f5SVaclav Hapla PetscInt i, j, k, nroots, nmroots; 1736673100f5SVaclav Hapla 1737673100f5SVaclav Hapla PetscFunctionBegin; 1738673100f5SVaclav Hapla PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17399566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 174029328920SVaclav Hapla if (nroots) PetscValidIntPointer(degree, 2); 174166dfcd1aSVaclav Hapla if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 174266dfcd1aSVaclav Hapla PetscValidPointer(multiRootsOrigNumbering, 4); 17439566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &msf)); 17449566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 17459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1746673100f5SVaclav Hapla for (i = 0, j = 0, k = 0; i < nroots; i++) { 1747673100f5SVaclav Hapla if (!degree[i]) continue; 17489371c9d4SSatish Balay for (j = 0; j < degree[i]; j++, k++) { (*multiRootsOrigNumbering)[k] = i; } 1749673100f5SVaclav Hapla } 175008401ef6SPierre Jolivet PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 175166dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots; 1752673100f5SVaclav Hapla PetscFunctionReturn(0); 1753673100f5SVaclav Hapla } 1754673100f5SVaclav Hapla 175595fce210SBarry Smith /*@C 175695fce210SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 175795fce210SBarry Smith 175895fce210SBarry Smith Collective 175995fce210SBarry Smith 17604165533cSJose E. Roman Input Parameters: 176195fce210SBarry Smith + sf - star forest 176295fce210SBarry Smith . unit - data type 176395fce210SBarry Smith - leafdata - leaf data to gather to roots 176495fce210SBarry Smith 17654165533cSJose E. Roman Output Parameter: 176695fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 176795fce210SBarry Smith 176895fce210SBarry Smith Level: intermediate 176995fce210SBarry Smith 1770db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 177195fce210SBarry Smith @*/ 17729371c9d4SSatish Balay PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1773a5526d50SJunchao Zhang PetscSF multi = NULL; 177495fce210SBarry Smith 177595fce210SBarry Smith PetscFunctionBegin; 177695fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 17779566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 17789566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 17799566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 178095fce210SBarry Smith PetscFunctionReturn(0); 178195fce210SBarry Smith } 178295fce210SBarry Smith 178395fce210SBarry Smith /*@C 178495fce210SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 178595fce210SBarry Smith 178695fce210SBarry Smith Collective 178795fce210SBarry Smith 17884165533cSJose E. Roman Input Parameters: 178995fce210SBarry Smith + sf - star forest 179095fce210SBarry Smith . unit - data type 179195fce210SBarry Smith - leafdata - leaf data to gather to roots 179295fce210SBarry Smith 17934165533cSJose E. Roman Output Parameter: 179495fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 179595fce210SBarry Smith 179695fce210SBarry Smith Level: intermediate 179795fce210SBarry Smith 1798db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 179995fce210SBarry Smith @*/ 18009371c9d4SSatish Balay PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1801a5526d50SJunchao Zhang PetscSF multi = NULL; 180295fce210SBarry Smith 180395fce210SBarry Smith PetscFunctionBegin; 180495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18059566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 18069566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 180795fce210SBarry Smith PetscFunctionReturn(0); 180895fce210SBarry Smith } 180995fce210SBarry Smith 181095fce210SBarry Smith /*@C 181195fce210SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 181295fce210SBarry Smith 181395fce210SBarry Smith Collective 181495fce210SBarry Smith 18154165533cSJose E. Roman Input Parameters: 181695fce210SBarry Smith + sf - star forest 181795fce210SBarry Smith . unit - data type 181895fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 181995fce210SBarry Smith 18204165533cSJose E. Roman Output Parameter: 182195fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 182295fce210SBarry Smith 182395fce210SBarry Smith Level: intermediate 182495fce210SBarry Smith 1825db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 182695fce210SBarry Smith @*/ 18279371c9d4SSatish Balay PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1828a5526d50SJunchao Zhang PetscSF multi = NULL; 182995fce210SBarry Smith 183095fce210SBarry Smith PetscFunctionBegin; 183195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18329566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 18339566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 18349566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 183595fce210SBarry Smith PetscFunctionReturn(0); 183695fce210SBarry Smith } 183795fce210SBarry Smith 183895fce210SBarry Smith /*@C 183995fce210SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 184095fce210SBarry Smith 184195fce210SBarry Smith Collective 184295fce210SBarry Smith 18434165533cSJose E. Roman Input Parameters: 184495fce210SBarry Smith + sf - star forest 184595fce210SBarry Smith . unit - data type 184695fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 184795fce210SBarry Smith 18484165533cSJose E. Roman Output Parameter: 184995fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 185095fce210SBarry Smith 185195fce210SBarry Smith Level: intermediate 185295fce210SBarry Smith 1853db781477SPatrick Sanan .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 185495fce210SBarry Smith @*/ 18559371c9d4SSatish Balay PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1856a5526d50SJunchao Zhang PetscSF multi = NULL; 185795fce210SBarry Smith 185895fce210SBarry Smith PetscFunctionBegin; 185995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 18609566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi)); 18619566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 186295fce210SBarry Smith PetscFunctionReturn(0); 186395fce210SBarry Smith } 1864a7b3aa13SAta Mesgarnejad 18659371c9d4SSatish Balay static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) { 1866a072220fSLawrence Mitchell PetscInt i, n, nleaves; 1867a072220fSLawrence Mitchell const PetscInt *ilocal = NULL; 1868a072220fSLawrence Mitchell PetscHSetI seen; 1869a072220fSLawrence Mitchell 1870a072220fSLawrence Mitchell PetscFunctionBegin; 1871b458e8f1SJose E. Roman if (PetscDefined(USE_DEBUG)) { 18729566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 18739566063dSJacob Faibussowitsch PetscCall(PetscHSetICreate(&seen)); 1874a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) { 1875a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 18769566063dSJacob Faibussowitsch PetscCall(PetscHSetIAdd(seen, leaf)); 1877a072220fSLawrence Mitchell } 18789566063dSJacob Faibussowitsch PetscCall(PetscHSetIGetSize(seen, &n)); 187908401ef6SPierre Jolivet PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 18809566063dSJacob Faibussowitsch PetscCall(PetscHSetIDestroy(&seen)); 1881b458e8f1SJose E. Roman } 1882a072220fSLawrence Mitchell PetscFunctionReturn(0); 1883a072220fSLawrence Mitchell } 188454729392SStefano Zampini 1885a7b3aa13SAta Mesgarnejad /*@ 188604c0ada0SJunchao Zhang PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1887a7b3aa13SAta Mesgarnejad 1888a7b3aa13SAta Mesgarnejad Input Parameters: 188954729392SStefano Zampini + sfA - The first PetscSF 189054729392SStefano Zampini - sfB - The second PetscSF 1891a7b3aa13SAta Mesgarnejad 1892a7b3aa13SAta Mesgarnejad Output Parameters: 189354729392SStefano Zampini . sfBA - The composite SF 1894a7b3aa13SAta Mesgarnejad 1895a7b3aa13SAta Mesgarnejad Level: developer 1896a7b3aa13SAta Mesgarnejad 1897a072220fSLawrence Mitchell Notes: 189854729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 189954729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. 190054729392SStefano Zampini 190154729392SStefano Zampini sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 190254729392SStefano Zampini a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 190354729392SStefano Zampini nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 190454729392SStefano Zampini Bcast on sfA, then a Bcast on sfB, on connected nodes. 1905a072220fSLawrence Mitchell 1906db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1907a7b3aa13SAta Mesgarnejad @*/ 19089371c9d4SSatish Balay PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 1909a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA, *remotePointsB; 1910d41018fbSJunchao Zhang PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 191154729392SStefano Zampini const PetscInt *localPointsA, *localPointsB; 191254729392SStefano Zampini PetscInt *localPointsBA; 191354729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 191454729392SStefano Zampini PetscBool denseB; 1915a7b3aa13SAta Mesgarnejad 1916a7b3aa13SAta Mesgarnejad PetscFunctionBegin; 1917a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 191829046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA, 1); 191929046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 192029046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB, 2); 192154729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 192229046d53SLisandro Dalcin PetscValidPointer(sfBA, 3); 19239566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 19249566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 192554729392SStefano Zampini 19269566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 19279566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1928d41018fbSJunchao Zhang if (localPointsA) { 19299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 193054729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 193154729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 193254729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 193354729392SStefano Zampini } 193454729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 193554729392SStefano Zampini if (localPointsA[i] >= numRootsB) continue; 193654729392SStefano Zampini reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 193754729392SStefano Zampini } 1938d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA; 1939d41018fbSJunchao Zhang } 19409566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 19419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 19429566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 19439566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 19449566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 1945d41018fbSJunchao Zhang 194654729392SStefano Zampini denseB = (PetscBool)!localPointsB; 194754729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 194854729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 194954729392SStefano Zampini else numLeavesBA++; 195054729392SStefano Zampini } 195154729392SStefano Zampini if (denseB) { 1952d41018fbSJunchao Zhang localPointsBA = NULL; 1953d41018fbSJunchao Zhang remotePointsBA = leafdataB; 1954d41018fbSJunchao Zhang } else { 19559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 19569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 195754729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 195854729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i; 195954729392SStefano Zampini 196054729392SStefano Zampini if (leafdataB[l - minleaf].rank == -1) continue; 196154729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 196254729392SStefano Zampini localPointsBA[numLeavesBA] = l; 196354729392SStefano Zampini numLeavesBA++; 196454729392SStefano Zampini } 19659566063dSJacob Faibussowitsch PetscCall(PetscFree(leafdataB)); 1966d41018fbSJunchao Zhang } 19679566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 19689566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 19699566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 1970a7b3aa13SAta Mesgarnejad PetscFunctionReturn(0); 1971a7b3aa13SAta Mesgarnejad } 19721c6ba672SJunchao Zhang 197304c0ada0SJunchao Zhang /*@ 197454729392SStefano Zampini PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 197504c0ada0SJunchao Zhang 197604c0ada0SJunchao Zhang Input Parameters: 197754729392SStefano Zampini + sfA - The first PetscSF 197854729392SStefano Zampini - sfB - The second PetscSF 197904c0ada0SJunchao Zhang 198004c0ada0SJunchao Zhang Output Parameters: 198154729392SStefano Zampini . sfBA - The composite SF. 198204c0ada0SJunchao Zhang 198304c0ada0SJunchao Zhang Level: developer 198404c0ada0SJunchao Zhang 198554729392SStefano Zampini Notes: 198654729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 198754729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 198854729392SStefano Zampini second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 198954729392SStefano Zampini 199054729392SStefano Zampini sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 199154729392SStefano Zampini a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 199254729392SStefano Zampini roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 199354729392SStefano Zampini a Reduce on sfB, on connected roots. 199454729392SStefano Zampini 1995db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 199604c0ada0SJunchao Zhang @*/ 19979371c9d4SSatish Balay PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 199804c0ada0SJunchao Zhang const PetscSFNode *remotePointsA, *remotePointsB; 199904c0ada0SJunchao Zhang PetscSFNode *remotePointsBA; 200004c0ada0SJunchao Zhang const PetscInt *localPointsA, *localPointsB; 200154729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL; 200254729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 20035b0d146aSStefano Zampini MPI_Op op; 20045b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 20055b0d146aSStefano Zampini PetscBool iswin; 20065b0d146aSStefano Zampini #endif 200704c0ada0SJunchao Zhang 200804c0ada0SJunchao Zhang PetscFunctionBegin; 200904c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 201004c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA, 1); 201104c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 201204c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB, 2); 201354729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2); 201404c0ada0SJunchao Zhang PetscValidPointer(sfBA, 3); 20159566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 20169566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 201754729392SStefano Zampini 20189566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 20199566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 20205b0d146aSStefano Zampini 20215b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */ 20225b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC 202383df288dSJunchao Zhang with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 20245b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if 20255b0d146aSStefano Zampini the root condition is not meet. 20265b0d146aSStefano Zampini */ 20275b0d146aSStefano Zampini op = MPI_MAXLOC; 20285b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 20295b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 20309566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 203183df288dSJunchao Zhang if (iswin) op = MPI_REPLACE; 20325b0d146aSStefano Zampini #endif 20335b0d146aSStefano Zampini 20349566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 20359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 203654729392SStefano Zampini for (i = 0; i < maxleaf - minleaf + 1; i++) { 203754729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 203854729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 203954729392SStefano Zampini } 204054729392SStefano Zampini if (localPointsA) { 204154729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 204254729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 204354729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 204454729392SStefano Zampini } 204554729392SStefano Zampini } else { 204654729392SStefano Zampini for (i = 0; i < numLeavesA; i++) { 204754729392SStefano Zampini if (i > maxleaf || i < minleaf) continue; 204854729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 204954729392SStefano Zampini } 205054729392SStefano Zampini } 205154729392SStefano Zampini 20529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 20539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 205454729392SStefano Zampini for (i = 0; i < numRootsB; i++) { 205554729392SStefano Zampini remotePointsBA[i].rank = -1; 205654729392SStefano Zampini remotePointsBA[i].index = -1; 205754729392SStefano Zampini } 205854729392SStefano Zampini 20599566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 20609566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 20619566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA)); 206254729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 206354729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue; 206454729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 206554729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 206654729392SStefano Zampini localPointsBA[numLeavesBA] = i; 206754729392SStefano Zampini numLeavesBA++; 206854729392SStefano Zampini } 20699566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 20709566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA)); 20719566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 207204c0ada0SJunchao Zhang PetscFunctionReturn(0); 207304c0ada0SJunchao Zhang } 207404c0ada0SJunchao Zhang 20751c6ba672SJunchao Zhang /* 20761c6ba672SJunchao Zhang PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 20771c6ba672SJunchao Zhang 20781c6ba672SJunchao Zhang Input Parameters: 20791c6ba672SJunchao Zhang . sf - The global PetscSF 20801c6ba672SJunchao Zhang 20811c6ba672SJunchao Zhang Output Parameters: 20821c6ba672SJunchao Zhang . out - The local PetscSF 20831c6ba672SJunchao Zhang */ 20849371c9d4SSatish Balay PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) { 20851c6ba672SJunchao Zhang MPI_Comm comm; 20861c6ba672SJunchao Zhang PetscMPIInt myrank; 20871c6ba672SJunchao Zhang const PetscInt *ilocal; 20881c6ba672SJunchao Zhang const PetscSFNode *iremote; 20891c6ba672SJunchao Zhang PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 20901c6ba672SJunchao Zhang PetscSFNode *liremote; 20911c6ba672SJunchao Zhang PetscSF lsf; 20921c6ba672SJunchao Zhang 20931c6ba672SJunchao Zhang PetscFunctionBegin; 20941c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2095dbbe0bcdSBarry Smith if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2096dbbe0bcdSBarry Smith else { 20971c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 20989566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 20999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 21001c6ba672SJunchao Zhang 21011c6ba672SJunchao Zhang /* Find out local edges and build a local SF */ 21029566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 21039371c9d4SSatish Balay for (i = lnleaves = 0; i < nleaves; i++) { 21049371c9d4SSatish Balay if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 21059371c9d4SSatish Balay } 21069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &lilocal)); 21079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &liremote)); 21081c6ba672SJunchao Zhang 21091c6ba672SJunchao Zhang for (i = j = 0; i < nleaves; i++) { 21101c6ba672SJunchao Zhang if (iremote[i].rank == (PetscInt)myrank) { 21111c6ba672SJunchao Zhang lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 21121c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 21131c6ba672SJunchao Zhang liremote[j].index = iremote[i].index; 21141c6ba672SJunchao Zhang j++; 21151c6ba672SJunchao Zhang } 21161c6ba672SJunchao Zhang } 21179566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 21189566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(lsf)); 21199566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 21209566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 21211c6ba672SJunchao Zhang *out = lsf; 21221c6ba672SJunchao Zhang } 21231c6ba672SJunchao Zhang PetscFunctionReturn(0); 21241c6ba672SJunchao Zhang } 2125dd5b3ca6SJunchao Zhang 2126dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 21279371c9d4SSatish Balay PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) { 2128eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype; 2129dd5b3ca6SJunchao Zhang 2130dd5b3ca6SJunchao Zhang PetscFunctionBegin; 2131dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 21329566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 21339566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 21349566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype)); 21359566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2136dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 21379566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2138dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 2139dd5b3ca6SJunchao Zhang } 2140dd5b3ca6SJunchao Zhang 2141157edd7aSVaclav Hapla /*@ 2142157edd7aSVaclav Hapla PetscSFConcatenate - concatenate multiple SFs into one 2143157edd7aSVaclav Hapla 2144157edd7aSVaclav Hapla Input Parameters: 2145157edd7aSVaclav Hapla + comm - the communicator 2146157edd7aSVaclav Hapla . nsfs - the number of input PetscSF 2147157edd7aSVaclav Hapla . sfs - the array of input PetscSF 2148157edd7aSVaclav Hapla . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2149157edd7aSVaclav Hapla - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2150157edd7aSVaclav Hapla 2151157edd7aSVaclav Hapla Output Parameters: 2152157edd7aSVaclav Hapla . newsf - The resulting PetscSF 2153157edd7aSVaclav Hapla 2154157edd7aSVaclav Hapla Level: developer 2155157edd7aSVaclav Hapla 2156157edd7aSVaclav Hapla Notes: 2157157edd7aSVaclav Hapla The communicator of all SFs in sfs must be comm. 2158157edd7aSVaclav Hapla 2159157edd7aSVaclav Hapla The offsets in leafOffsets are added to the original leaf indices. 2160157edd7aSVaclav Hapla 2161157edd7aSVaclav Hapla If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2162157edd7aSVaclav Hapla In this case, NULL is also passed as ilocal to the resulting SF. 2163157edd7aSVaclav Hapla 2164157edd7aSVaclav Hapla If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2165157edd7aSVaclav Hapla In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2166157edd7aSVaclav Hapla 2167db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2168157edd7aSVaclav Hapla @*/ 21699371c9d4SSatish Balay PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) { 2170157edd7aSVaclav Hapla PetscInt i, s, nLeaves, nRoots; 2171157edd7aSVaclav Hapla PetscInt *leafArrayOffsets; 2172157edd7aSVaclav Hapla PetscInt *ilocal_new; 2173157edd7aSVaclav Hapla PetscSFNode *iremote_new; 2174157edd7aSVaclav Hapla PetscInt *rootOffsets; 2175157edd7aSVaclav Hapla PetscBool all_ilocal_null = PETSC_FALSE; 2176157edd7aSVaclav Hapla PetscMPIInt rank; 2177157edd7aSVaclav Hapla 2178157edd7aSVaclav Hapla PetscFunctionBegin; 2179157edd7aSVaclav Hapla { 2180157edd7aSVaclav Hapla PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2181157edd7aSVaclav Hapla 21829566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &dummy)); 2183157edd7aSVaclav Hapla PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2184157edd7aSVaclav Hapla PetscValidPointer(sfs, 3); 2185157edd7aSVaclav Hapla for (i = 0; i < nsfs; i++) { 2186157edd7aSVaclav Hapla PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2187157edd7aSVaclav Hapla PetscCheckSameComm(dummy, 1, sfs[i], 3); 2188157edd7aSVaclav Hapla } 2189157edd7aSVaclav Hapla PetscValidLogicalCollectiveBool(dummy, shareRoots, 4); 2190157edd7aSVaclav Hapla if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2191157edd7aSVaclav Hapla PetscValidPointer(newsf, 6); 21929566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&dummy)); 2193157edd7aSVaclav Hapla } 2194157edd7aSVaclav Hapla if (!nsfs) { 21959566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 21969566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2197157edd7aSVaclav Hapla PetscFunctionReturn(0); 2198157edd7aSVaclav Hapla } 21999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2200157edd7aSVaclav Hapla 22019566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets)); 2202157edd7aSVaclav Hapla if (shareRoots) { 22039566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2204157edd7aSVaclav Hapla if (PetscDefined(USE_DEBUG)) { 2205157edd7aSVaclav Hapla for (s = 1; s < nsfs; s++) { 2206157edd7aSVaclav Hapla PetscInt nr; 2207157edd7aSVaclav Hapla 22089566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2209157edd7aSVaclav Hapla PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2210157edd7aSVaclav Hapla } 2211157edd7aSVaclav Hapla } 2212157edd7aSVaclav Hapla } else { 2213157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2214157edd7aSVaclav Hapla PetscInt nr; 2215157edd7aSVaclav Hapla 22169566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2217157edd7aSVaclav Hapla rootOffsets[s + 1] = rootOffsets[s] + nr; 2218157edd7aSVaclav Hapla } 2219157edd7aSVaclav Hapla nRoots = rootOffsets[nsfs]; 2220157edd7aSVaclav Hapla } 2221157edd7aSVaclav Hapla 2222157edd7aSVaclav Hapla /* Calculate leaf array offsets and automatic root offsets */ 22239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2224157edd7aSVaclav Hapla leafArrayOffsets[0] = 0; 2225157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2226157edd7aSVaclav Hapla PetscInt nl; 2227157edd7aSVaclav Hapla 22289566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2229157edd7aSVaclav Hapla leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2230157edd7aSVaclav Hapla } 2231157edd7aSVaclav Hapla nLeaves = leafArrayOffsets[nsfs]; 2232157edd7aSVaclav Hapla 2233157edd7aSVaclav Hapla if (!leafOffsets) { 2234157edd7aSVaclav Hapla all_ilocal_null = PETSC_TRUE; 2235157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2236157edd7aSVaclav Hapla const PetscInt *ilocal; 2237157edd7aSVaclav Hapla 22389566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2239157edd7aSVaclav Hapla if (ilocal) { 2240157edd7aSVaclav Hapla all_ilocal_null = PETSC_FALSE; 2241157edd7aSVaclav Hapla break; 2242157edd7aSVaclav Hapla } 2243157edd7aSVaclav Hapla } 2244157edd7aSVaclav Hapla PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2245157edd7aSVaclav Hapla } 2246157edd7aSVaclav Hapla 2247157edd7aSVaclav Hapla /* Renumber and concatenate local leaves */ 2248157edd7aSVaclav Hapla ilocal_new = NULL; 2249157edd7aSVaclav Hapla if (!all_ilocal_null) { 22509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2251157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2252157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2253157edd7aSVaclav Hapla const PetscInt *ilocal; 2254157edd7aSVaclav Hapla PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2255157edd7aSVaclav Hapla PetscInt i, nleaves_l; 2256157edd7aSVaclav Hapla 22579566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2258157edd7aSVaclav Hapla for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2259157edd7aSVaclav Hapla } 2260157edd7aSVaclav Hapla } 2261157edd7aSVaclav Hapla 2262157edd7aSVaclav Hapla /* Renumber and concatenate remote roots */ 22639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2264157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) { 2265157edd7aSVaclav Hapla iremote_new[i].rank = -1; 2266157edd7aSVaclav Hapla iremote_new[i].index = -1; 2267157edd7aSVaclav Hapla } 2268157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) { 2269157edd7aSVaclav Hapla PetscInt i, nl, nr; 2270157edd7aSVaclav Hapla PetscSF tmp_sf; 2271157edd7aSVaclav Hapla const PetscSFNode *iremote; 2272157edd7aSVaclav Hapla PetscSFNode *tmp_rootdata; 2273157edd7aSVaclav Hapla PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2274157edd7aSVaclav Hapla 22759566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 22769566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &tmp_sf)); 2277157edd7aSVaclav Hapla /* create helper SF with contiguous leaves */ 22789566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 22799566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(tmp_sf)); 22809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2281157edd7aSVaclav Hapla for (i = 0; i < nr; i++) { 2282157edd7aSVaclav Hapla tmp_rootdata[i].index = i + rootOffsets[s]; 2283157edd7aSVaclav Hapla tmp_rootdata[i].rank = (PetscInt)rank; 2284157edd7aSVaclav Hapla } 22859566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 22869566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 22879566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&tmp_sf)); 22889566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp_rootdata)); 2289157edd7aSVaclav Hapla } 2290157edd7aSVaclav Hapla 2291157edd7aSVaclav Hapla /* Build the new SF */ 22929566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf)); 22939566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 22949566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(*newsf)); 22959566063dSJacob Faibussowitsch PetscCall(PetscFree(rootOffsets)); 22969566063dSJacob Faibussowitsch PetscCall(PetscFree(leafArrayOffsets)); 2297157edd7aSVaclav Hapla PetscFunctionReturn(0); 2298157edd7aSVaclav Hapla } 2299