xref: /petsc/src/vec/is/sf/impls/basic/allgather/sfallgather.c (revision 855db38d8495605a5e3dcabe2b88744316163d08)
1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2dd5b3ca6SJunchao Zhang 
3dd5b3ca6SJunchao Zhang #define PetscSFPackGet_Allgather PetscSFPackGet_Allgatherv
4dd5b3ca6SJunchao Zhang 
5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used in Allgather on rank != 0, which is not a big deal */
6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Allgather;
7dd5b3ca6SJunchao Zhang 
8eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gather(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
9dd5b3ca6SJunchao Zhang 
10eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
11dd5b3ca6SJunchao Zhang {
12dd5b3ca6SJunchao Zhang   PetscErrorCode        ierr;
13eb02082bSJunchao Zhang   PetscSFPack           link;
14dd5b3ca6SJunchao Zhang   PetscMPIInt           sendcount;
15dd5b3ca6SJunchao Zhang   MPI_Comm              comm;
16*855db38dSJunchao Zhang   const void            *rootbuf_mpi; /* buffer used by MPI */
17*855db38dSJunchao Zhang   void                  *leafbuf_mpi;
18*855db38dSJunchao Zhang   PetscMemType          rootmtype_mpi,leafmtype_mpi;
19dd5b3ca6SJunchao Zhang 
20dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
21eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Allgather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
22dd5b3ca6SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
23dd5b3ca6SJunchao Zhang   ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr);
24*855db38dSJunchao Zhang   ierr = PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr);
25*855db38dSJunchao Zhang   ierr = MPIU_Iallgather(rootbuf_mpi,sendcount,unit,leafbuf_mpi,sendcount,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype_mpi]);CHKERRQ(ierr);
26*855db38dSJunchao Zhang   PetscFunctionReturn(0);
27*855db38dSJunchao Zhang }
28*855db38dSJunchao Zhang 
29*855db38dSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
30*855db38dSJunchao Zhang {
31*855db38dSJunchao Zhang   PetscErrorCode        ierr;
32*855db38dSJunchao Zhang   PetscSFPack           link;
33*855db38dSJunchao Zhang   PetscMPIInt           sendcount;
34*855db38dSJunchao Zhang   PetscInt              rstart;
35*855db38dSJunchao Zhang   MPI_Comm              comm;
36*855db38dSJunchao Zhang   const void            *leafbuf_mpi;
37*855db38dSJunchao Zhang   void                  *rootbuf_mpi;
38*855db38dSJunchao Zhang   PetscMemType          leafmtype_mpi,rootmtype_mpi;
39*855db38dSJunchao Zhang 
40*855db38dSJunchao Zhang   PetscFunctionBegin;
41*855db38dSJunchao Zhang   ierr = PetscSFPackGet_Allgather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
42*855db38dSJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
43dd5b3ca6SJunchao Zhang 
44dd5b3ca6SJunchao Zhang   if (op == MPIU_REPLACE) {
45*855db38dSJunchao Zhang     /* REPLACE is only meaningful when all processes have the same leafdata to reduce. Therefore copy from local leafdata is fine */
46*855db38dSJunchao Zhang     ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr);
47*855db38dSJunchao Zhang     ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr);
48dd5b3ca6SJunchao Zhang   } else {
49*855db38dSJunchao Zhang     ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr);
50*855db38dSJunchao Zhang     ierr = PetscSFReducePrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr);
51*855db38dSJunchao Zhang     ierr = MPIU_Iscatter(leafbuf_mpi,sendcount,unit,rootbuf_mpi,sendcount,unit,0/*rank 0*/,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype_mpi]);CHKERRQ(ierr);
52dd5b3ca6SJunchao Zhang   }
53dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
54dd5b3ca6SJunchao Zhang }
55dd5b3ca6SJunchao Zhang 
56eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastToZero_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata)
57dd5b3ca6SJunchao Zhang {
58dd5b3ca6SJunchao Zhang   PetscErrorCode        ierr;
59eb02082bSJunchao Zhang   PetscSFPack           link;
60*855db38dSJunchao Zhang   PetscMPIInt           rank;
61dd5b3ca6SJunchao Zhang 
62dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
63eb02082bSJunchao Zhang   ierr = PetscSFBcastAndOpBegin_Gather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,MPIU_REPLACE);CHKERRQ(ierr);
64eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
65*855db38dSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
66*855db38dSJunchao Zhang   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
67*855db38dSJunchao Zhang   if (!rank && leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) {
68*855db38dSJunchao Zhang     ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_DEVICE,PETSC_MEMTYPE_HOST,leafdata,link->leafbuf[PETSC_MEMTYPE_HOST],link->leafbuflen*link->unitbytes);CHKERRQ(ierr);
69*855db38dSJunchao Zhang   }
70eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
71dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
72dd5b3ca6SJunchao Zhang }
73dd5b3ca6SJunchao Zhang 
74dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Allgather(PetscSF sf)
75dd5b3ca6SJunchao Zhang {
76dd5b3ca6SJunchao Zhang   PetscErrorCode    ierr;
77dd5b3ca6SJunchao Zhang   PetscSF_Allgather *dat = (PetscSF_Allgather*)sf->data;
78dd5b3ca6SJunchao Zhang 
79dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
80dd5b3ca6SJunchao Zhang 
81dd5b3ca6SJunchao Zhang   /* Inherit from Allgatherv */
82dd5b3ca6SJunchao Zhang   sf->ops->Reset           = PetscSFReset_Allgatherv;
83dd5b3ca6SJunchao Zhang   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
84dd5b3ca6SJunchao Zhang   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
85dd5b3ca6SJunchao Zhang   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
86dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv;
87dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
88dd5b3ca6SJunchao Zhang   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
89dd5b3ca6SJunchao Zhang   sf->ops->CreateLocalSF   = PetscSFCreateLocalSF_Allgatherv;
90dd5b3ca6SJunchao Zhang   sf->ops->GetGraph        = PetscSFGetGraph_Allgatherv;
91dd5b3ca6SJunchao Zhang   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
92dd5b3ca6SJunchao Zhang 
93dd5b3ca6SJunchao Zhang   /* Allgather stuff */
94dd5b3ca6SJunchao Zhang   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgather;
95dd5b3ca6SJunchao Zhang   sf->ops->ReduceBegin     = PetscSFReduceBegin_Allgather;
96dd5b3ca6SJunchao Zhang   sf->ops->BcastToZero     = PetscSFBcastToZero_Allgather;
97dd5b3ca6SJunchao Zhang 
98dd5b3ca6SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
99dd5b3ca6SJunchao Zhang   sf->data = (void*)dat;
100dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
101dd5b3ca6SJunchao Zhang }
102