1*dd5b3ca6SJunchao Zhang 2*dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 3*dd5b3ca6SJunchao Zhang 4*dd5b3ca6SJunchao Zhang typedef PetscSFPack_Allgatherv PetscSFPack_Allgather; 5*dd5b3ca6SJunchao Zhang #define PetscSFPackGet_Allgather PetscSFPackGet_Allgatherv 6*dd5b3ca6SJunchao Zhang 7*dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used in Allgather on rank != 0, which is not a big deal */ 8*dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Allgather; 9*dd5b3ca6SJunchao Zhang 10*dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gather(PetscSF,MPI_Datatype,const void*,void*,MPI_Op); 11*dd5b3ca6SJunchao Zhang 12*dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Allgather(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 13*dd5b3ca6SJunchao Zhang { 14*dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 15*dd5b3ca6SJunchao Zhang PetscSFPack_Allgather link; 16*dd5b3ca6SJunchao Zhang PetscMPIInt sendcount; 17*dd5b3ca6SJunchao Zhang MPI_Comm comm; 18*dd5b3ca6SJunchao Zhang 19*dd5b3ca6SJunchao Zhang PetscFunctionBegin; 20*dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Allgather(sf,unit,rootdata,&link);CHKERRQ(ierr); 21*dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 22*dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 23*dd5b3ca6SJunchao Zhang 24*dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 25*dd5b3ca6SJunchao Zhang ierr = MPIU_Iallgather(rootdata,sendcount,unit,leafdata,sendcount,unit,comm,&link->request);CHKERRQ(ierr); 26*dd5b3ca6SJunchao Zhang } else { 27*dd5b3ca6SJunchao Zhang /* Allgather to the leaf buffer and then add leaf buffer to rootdata */ 28*dd5b3ca6SJunchao Zhang if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 29*dd5b3ca6SJunchao Zhang ierr = MPIU_Iallgather(rootdata,sendcount,unit,link->leaf,sendcount,unit,comm,&link->request);CHKERRQ(ierr); 30*dd5b3ca6SJunchao Zhang } 31*dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 32*dd5b3ca6SJunchao Zhang } 33*dd5b3ca6SJunchao Zhang 34*dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFBcastToZero_Allgather(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 35*dd5b3ca6SJunchao Zhang { 36*dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 37*dd5b3ca6SJunchao Zhang PetscSFPack_Allgather link; 38*dd5b3ca6SJunchao Zhang 39*dd5b3ca6SJunchao Zhang PetscFunctionBegin; 40*dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpBegin_Gather(sf,unit,rootdata,leafdata,MPIU_REPLACE);CHKERRQ(ierr); 41*dd5b3ca6SJunchao Zhang /* A simplified PetscSFBcastAndOpEnd_Allgatherv */ 42*dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 43*dd5b3ca6SJunchao Zhang ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 44*dd5b3ca6SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 45*dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 46*dd5b3ca6SJunchao Zhang } 47*dd5b3ca6SJunchao Zhang 48*dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Allgather(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 49*dd5b3ca6SJunchao Zhang { 50*dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 51*dd5b3ca6SJunchao Zhang PetscSFPack_Allgather link; 52*dd5b3ca6SJunchao Zhang PetscMPIInt rank,count,sendcount; 53*dd5b3ca6SJunchao Zhang PetscInt rstart; 54*dd5b3ca6SJunchao Zhang MPI_Comm comm; 55*dd5b3ca6SJunchao Zhang 56*dd5b3ca6SJunchao Zhang PetscFunctionBegin; 57*dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Allgather(sf,unit,leafdata,&link);CHKERRQ(ierr); 58*dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 59*dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 60*dd5b3ca6SJunchao Zhang 61*dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 62*dd5b3ca6SJunchao Zhang /* REPLACE is only meaningful when all processes have the same leafdata to reduce. Therefore copy from local leafdata is fine */ 63*dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 64*dd5b3ca6SJunchao Zhang ierr = PetscMemcpy(rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr); 65*dd5b3ca6SJunchao Zhang } else { 66*dd5b3ca6SJunchao Zhang /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */ 67*dd5b3ca6SJunchao Zhang if (!rank && !link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 68*dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr); 69*dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 70*dd5b3ca6SJunchao Zhang ierr = MPI_Reduce(leafdata,link->leaf,count,link->basicunit,op,0/*rank 0*/,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */ 71*dd5b3ca6SJunchao Zhang if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);} /* Allocate root buffer */ 72*dd5b3ca6SJunchao Zhang ierr = MPIU_Iscatter(link->leaf,sendcount,unit,link->root,sendcount,unit,0/*rank 0*/,comm,&link->request);CHKERRQ(ierr); 73*dd5b3ca6SJunchao Zhang } 74*dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 75*dd5b3ca6SJunchao Zhang } 76*dd5b3ca6SJunchao Zhang 77*dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Allgather(PetscSF sf) 78*dd5b3ca6SJunchao Zhang { 79*dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 80*dd5b3ca6SJunchao Zhang PetscSF_Allgather *dat = (PetscSF_Allgather*)sf->data; 81*dd5b3ca6SJunchao Zhang 82*dd5b3ca6SJunchao Zhang PetscFunctionBegin; 83*dd5b3ca6SJunchao Zhang 84*dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 85*dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 86*dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 87*dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 88*dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 89*dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv; 90*dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 91*dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 92*dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 93*dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 94*dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 95*dd5b3ca6SJunchao Zhang 96*dd5b3ca6SJunchao Zhang /* Allgather stuff */ 97*dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgather; 98*dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Allgather; 99*dd5b3ca6SJunchao Zhang sf->ops->BcastToZero = PetscSFBcastToZero_Allgather; 100*dd5b3ca6SJunchao Zhang 101*dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 102*dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 103*dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 104*dd5b3ca6SJunchao Zhang } 105