1dd5b3ca6SJunchao Zhang 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 3dd5b3ca6SJunchao Zhang 4eb02082bSJunchao Zhang #define PetscSFPackGet_Gatherv PetscSFPackGet_Allgatherv 5eb02082bSJunchao Zhang 6eb02082bSJunchao Zhang /* Reuse the type. The difference is some fields (displs, recvcounts) are only significant 7eb02082bSJunchao Zhang on rank 0 in Gatherv. On other ranks they are harmless NULL. 8eb02082bSJunchao Zhang */ 9eb02082bSJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gatherv; 10eb02082bSJunchao Zhang 11eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 12dd5b3ca6SJunchao Zhang { 13dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 14eb02082bSJunchao Zhang PetscSFPack link; 15*855db38dSJunchao Zhang PetscMPIInt sendcount; 16dd5b3ca6SJunchao Zhang MPI_Comm comm; 17dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 18*855db38dSJunchao Zhang const void *rootbuf_mpi; /* buffer used by MPI */ 19*855db38dSJunchao Zhang void *leafbuf_mpi; 20*855db38dSJunchao Zhang PetscMemType rootmtype_mpi,leafmtype_mpi; 21dd5b3ca6SJunchao Zhang 22dd5b3ca6SJunchao Zhang PetscFunctionBegin; 23eb02082bSJunchao Zhang ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 24dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 25dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 26*855db38dSJunchao Zhang ierr = PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 27*855db38dSJunchao Zhang ierr = MPIU_Igatherv(rootbuf_mpi,sendcount,unit,leafbuf_mpi,dat->recvcounts,dat->displs,unit,0/*rank 0*/,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype_mpi]);CHKERRQ(ierr); 28*855db38dSJunchao Zhang PetscFunctionReturn(0); 29dd5b3ca6SJunchao Zhang } 30dd5b3ca6SJunchao Zhang 31*855db38dSJunchao Zhang /* 32*855db38dSJunchao Zhang Prepare the rootbuf, leafbuf etc used by MPI in PetscSFReduceBegin. 33*855db38dSJunchao Zhang 34*855db38dSJunchao Zhang Input Arguments: 35*855db38dSJunchao Zhang + sf - the start forest 36*855db38dSJunchao Zhang . link - the link PetscSFReduceBegin is currently using 37*855db38dSJunchao Zhang - op - the reduction op 38*855db38dSJunchao Zhang 39*855db38dSJunchao Zhang Output Arguments: 40*855db38dSJunchao Zhang +rootmtype_mpi - memtype of rootbuf_mpi 41*855db38dSJunchao Zhang .rootbuf_mpi - root buffer used by MPI in the following MPI call 42*855db38dSJunchao Zhang .leafmtype_mpi - memtype of leafbuf_mpi 43*855db38dSJunchao Zhang -leafbuf_mpi - leaf buffer used by MPI in the following MPI call 44*855db38dSJunchao Zhang */ 45*855db38dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReducePrepareMPIBuffers_Gatherv(PetscSF sf,PetscSFPack link,MPI_Op op,PetscMemType *rootmtype_mpi,void **rootbuf_mpi,PetscMemType *leafmtype_mpi,const void **leafbuf_mpi) 46*855db38dSJunchao Zhang { 47*855db38dSJunchao Zhang PetscErrorCode ierr; 48*855db38dSJunchao Zhang PetscMPIInt rank; 49*855db38dSJunchao Zhang MPI_Comm comm; 50*855db38dSJunchao Zhang 51*855db38dSJunchao Zhang PetscFunctionBegin; 52*855db38dSJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 53*855db38dSJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 54*855db38dSJunchao Zhang 55*855db38dSJunchao Zhang if (link->leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* Need to copy leafdata to leafbuf on every rank */ 56*855db38dSJunchao Zhang if (!rank && !link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 57*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_HOST,PETSC_MEMTYPE_DEVICE,link->leafbuf[PETSC_MEMTYPE_HOST],link->lkey,link->leafbuflen*link->unitbytes);CHKERRQ(ierr); 58*855db38dSJunchao Zhang *leafmtype_mpi = PETSC_MEMTYPE_HOST; 59*855db38dSJunchao Zhang *leafbuf_mpi = link->leafbuf[*leafmtype_mpi]; 60*855db38dSJunchao Zhang } else { 61*855db38dSJunchao Zhang *leafmtype_mpi = link->leafmtype; 62*855db38dSJunchao Zhang *leafbuf_mpi = (char*)link->lkey; 63*855db38dSJunchao Zhang } 64*855db38dSJunchao Zhang 65*855db38dSJunchao Zhang if (link->rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* If rootdata is on device but no gpu-aware mpi, we need a rootbuf on host to receive reduced data */ 66*855db38dSJunchao Zhang if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 67*855db38dSJunchao Zhang *rootbuf_mpi = link->rootbuf[PETSC_MEMTYPE_HOST]; 68*855db38dSJunchao Zhang *rootmtype_mpi = PETSC_MEMTYPE_HOST; 69*855db38dSJunchao Zhang } else if (op == MPIU_REPLACE) { /* Directly use rootdata's memory to receive reduced data. No intermediate buffer needed. */ 70*855db38dSJunchao Zhang *rootbuf_mpi = (char *)link->rkey; 71*855db38dSJunchao Zhang *rootmtype_mpi = link->rootmtype; 72*855db38dSJunchao Zhang } else { /* op is a reduction. Have to allocate a buffer aside rootdata to apply it. The buffer is either on host or device, depending on where rootdata is. */ 73*855db38dSJunchao Zhang if (!link->rootbuf[link->rootmtype]) {ierr = PetscMallocWithMemType(link->rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[link->rootmtype]);CHKERRQ(ierr);} 74*855db38dSJunchao Zhang *rootbuf_mpi = link->rootbuf[link->rootmtype]; 75*855db38dSJunchao Zhang *rootmtype_mpi = link->rootmtype; 76*855db38dSJunchao Zhang } 77dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 78dd5b3ca6SJunchao Zhang } 79dd5b3ca6SJunchao Zhang 80eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 81dd5b3ca6SJunchao Zhang { 82dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 83eb02082bSJunchao Zhang PetscSFPack link; 84dd5b3ca6SJunchao Zhang PetscMPIInt recvcount; 85dd5b3ca6SJunchao Zhang MPI_Comm comm; 86dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 87*855db38dSJunchao Zhang const void *leafbuf_mpi; 88*855db38dSJunchao Zhang void *rootbuf_mpi; 89*855db38dSJunchao Zhang PetscMemType leafmtype_mpi,rootmtype_mpi; 90dd5b3ca6SJunchao Zhang 91dd5b3ca6SJunchao Zhang PetscFunctionBegin; 92eb02082bSJunchao Zhang ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 93dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 94dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr); 95*855db38dSJunchao Zhang ierr = PetscSFReducePrepareMPIBuffers_Gatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 96*855db38dSJunchao Zhang ierr = MPIU_Iscatterv(leafbuf_mpi,dat->recvcounts,dat->displs,unit,rootbuf_mpi,recvcount,unit,0,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype_mpi]);CHKERRQ(ierr); 97dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 98dd5b3ca6SJunchao Zhang } 99dd5b3ca6SJunchao Zhang 100eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 101dd5b3ca6SJunchao Zhang { 102dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 103dd5b3ca6SJunchao Zhang 104dd5b3ca6SJunchao Zhang PetscFunctionBegin; 105dd5b3ca6SJunchao Zhang /* In Gatherv, each root only has one leaf. So we just need to bcast rootdata to leafupdate and then reduce leafdata to rootdata */ 106dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 107dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 108dd5b3ca6SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 109dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 110dd5b3ca6SJunchao Zhang } 111dd5b3ca6SJunchao Zhang 112dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Gatherv(PetscSF sf) 113dd5b3ca6SJunchao Zhang { 114dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 115dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 116dd5b3ca6SJunchao Zhang 117dd5b3ca6SJunchao Zhang PetscFunctionBegin; 118dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 119dd5b3ca6SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgatherv; 120dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 121dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 122dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 123dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 124dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 125dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 126dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 127dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 128dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 129dd5b3ca6SJunchao Zhang 130dd5b3ca6SJunchao Zhang /* Gatherv stuff */ 131dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Gatherv; 132dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Gatherv; 133dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 134dd5b3ca6SJunchao Zhang 135dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 136dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 137dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 138dd5b3ca6SJunchao Zhang } 139