xref: /petsc/src/vec/is/sf/impls/basic/gatherv/sfgatherv.c (revision 855db38d8495605a5e3dcabe2b88744316163d08)
1dd5b3ca6SJunchao Zhang 
2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
3dd5b3ca6SJunchao Zhang 
4eb02082bSJunchao Zhang #define PetscSFPackGet_Gatherv PetscSFPackGet_Allgatherv
5eb02082bSJunchao Zhang 
6eb02082bSJunchao Zhang /* Reuse the type. The difference is some fields (displs, recvcounts) are only significant
7eb02082bSJunchao Zhang    on rank 0 in Gatherv. On other ranks they are harmless NULL.
8eb02082bSJunchao Zhang  */
9eb02082bSJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gatherv;
10eb02082bSJunchao Zhang 
11eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
12dd5b3ca6SJunchao Zhang {
13dd5b3ca6SJunchao Zhang   PetscErrorCode       ierr;
14eb02082bSJunchao Zhang   PetscSFPack          link;
15*855db38dSJunchao Zhang   PetscMPIInt          sendcount;
16dd5b3ca6SJunchao Zhang   MPI_Comm             comm;
17dd5b3ca6SJunchao Zhang   PetscSF_Gatherv      *dat = (PetscSF_Gatherv*)sf->data;
18*855db38dSJunchao Zhang   const void           *rootbuf_mpi; /* buffer used by MPI */
19*855db38dSJunchao Zhang   void                 *leafbuf_mpi;
20*855db38dSJunchao Zhang   PetscMemType         rootmtype_mpi,leafmtype_mpi;
21dd5b3ca6SJunchao Zhang 
22dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
23eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
24dd5b3ca6SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
25dd5b3ca6SJunchao Zhang   ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr);
26*855db38dSJunchao Zhang   ierr = PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr);
27*855db38dSJunchao Zhang   ierr = MPIU_Igatherv(rootbuf_mpi,sendcount,unit,leafbuf_mpi,dat->recvcounts,dat->displs,unit,0/*rank 0*/,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype_mpi]);CHKERRQ(ierr);
28*855db38dSJunchao Zhang   PetscFunctionReturn(0);
29dd5b3ca6SJunchao Zhang }
30dd5b3ca6SJunchao Zhang 
31*855db38dSJunchao Zhang /*
32*855db38dSJunchao Zhang   Prepare the rootbuf, leafbuf etc used by MPI in PetscSFReduceBegin.
33*855db38dSJunchao Zhang 
34*855db38dSJunchao Zhang Input Arguments:
35*855db38dSJunchao Zhang + sf    - the start forest
36*855db38dSJunchao Zhang . link  - the link PetscSFReduceBegin is currently using
37*855db38dSJunchao Zhang - op    - the reduction op
38*855db38dSJunchao Zhang 
39*855db38dSJunchao Zhang Output Arguments:
40*855db38dSJunchao Zhang +rootmtype_mpi  - memtype of rootbuf_mpi
41*855db38dSJunchao Zhang .rootbuf_mpi    - root buffer used by MPI in the following MPI call
42*855db38dSJunchao Zhang .leafmtype_mpi  - memtype of leafbuf_mpi
43*855db38dSJunchao Zhang -leafbuf_mpi    - leaf buffer used by MPI in the following MPI call
44*855db38dSJunchao Zhang */
45*855db38dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReducePrepareMPIBuffers_Gatherv(PetscSF sf,PetscSFPack link,MPI_Op op,PetscMemType *rootmtype_mpi,void **rootbuf_mpi,PetscMemType *leafmtype_mpi,const void **leafbuf_mpi)
46*855db38dSJunchao Zhang {
47*855db38dSJunchao Zhang   PetscErrorCode         ierr;
48*855db38dSJunchao Zhang   PetscMPIInt            rank;
49*855db38dSJunchao Zhang   MPI_Comm               comm;
50*855db38dSJunchao Zhang 
51*855db38dSJunchao Zhang   PetscFunctionBegin;
52*855db38dSJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
53*855db38dSJunchao Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
54*855db38dSJunchao Zhang 
55*855db38dSJunchao Zhang   if (link->leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* Need to copy leafdata to leafbuf on every rank */
56*855db38dSJunchao Zhang     if (!rank && !link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);}
57*855db38dSJunchao Zhang     ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_HOST,PETSC_MEMTYPE_DEVICE,link->leafbuf[PETSC_MEMTYPE_HOST],link->lkey,link->leafbuflen*link->unitbytes);CHKERRQ(ierr);
58*855db38dSJunchao Zhang     *leafmtype_mpi = PETSC_MEMTYPE_HOST;
59*855db38dSJunchao Zhang     *leafbuf_mpi   = link->leafbuf[*leafmtype_mpi];
60*855db38dSJunchao Zhang   } else {
61*855db38dSJunchao Zhang     *leafmtype_mpi = link->leafmtype;
62*855db38dSJunchao Zhang     *leafbuf_mpi   = (char*)link->lkey;
63*855db38dSJunchao Zhang   }
64*855db38dSJunchao Zhang 
65*855db38dSJunchao Zhang   if (link->rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) {  /* If rootdata is on device but no gpu-aware mpi, we need a rootbuf on host to receive reduced data */
66*855db38dSJunchao Zhang     if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);}
67*855db38dSJunchao Zhang     *rootbuf_mpi   = link->rootbuf[PETSC_MEMTYPE_HOST];
68*855db38dSJunchao Zhang     *rootmtype_mpi = PETSC_MEMTYPE_HOST;
69*855db38dSJunchao Zhang   } else if (op == MPIU_REPLACE) { /* Directly use rootdata's memory to receive reduced data. No intermediate buffer needed. */
70*855db38dSJunchao Zhang     *rootbuf_mpi   = (char *)link->rkey;
71*855db38dSJunchao Zhang     *rootmtype_mpi = link->rootmtype;
72*855db38dSJunchao Zhang   } else { /* op is a reduction. Have to allocate a buffer aside rootdata to apply it. The buffer is either on host or device, depending on where rootdata is. */
73*855db38dSJunchao Zhang     if (!link->rootbuf[link->rootmtype]) {ierr = PetscMallocWithMemType(link->rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[link->rootmtype]);CHKERRQ(ierr);}
74*855db38dSJunchao Zhang     *rootbuf_mpi   = link->rootbuf[link->rootmtype];
75*855db38dSJunchao Zhang     *rootmtype_mpi = link->rootmtype;
76*855db38dSJunchao Zhang   }
77dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
78dd5b3ca6SJunchao Zhang }
79dd5b3ca6SJunchao Zhang 
80eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
81dd5b3ca6SJunchao Zhang {
82dd5b3ca6SJunchao Zhang   PetscErrorCode       ierr;
83eb02082bSJunchao Zhang   PetscSFPack          link;
84dd5b3ca6SJunchao Zhang   PetscMPIInt          recvcount;
85dd5b3ca6SJunchao Zhang   MPI_Comm             comm;
86dd5b3ca6SJunchao Zhang   PetscSF_Gatherv      *dat = (PetscSF_Gatherv*)sf->data;
87*855db38dSJunchao Zhang   const void           *leafbuf_mpi;
88*855db38dSJunchao Zhang   void                 *rootbuf_mpi;
89*855db38dSJunchao Zhang   PetscMemType         leafmtype_mpi,rootmtype_mpi;
90dd5b3ca6SJunchao Zhang 
91dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
92eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
93dd5b3ca6SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
94dd5b3ca6SJunchao Zhang   ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr);
95*855db38dSJunchao Zhang   ierr = PetscSFReducePrepareMPIBuffers_Gatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr);
96*855db38dSJunchao Zhang   ierr = MPIU_Iscatterv(leafbuf_mpi,dat->recvcounts,dat->displs,unit,rootbuf_mpi,recvcount,unit,0,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype_mpi]);CHKERRQ(ierr);
97dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
98dd5b3ca6SJunchao Zhang }
99dd5b3ca6SJunchao Zhang 
100eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
101dd5b3ca6SJunchao Zhang {
102dd5b3ca6SJunchao Zhang   PetscErrorCode      ierr;
103dd5b3ca6SJunchao Zhang 
104dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
105dd5b3ca6SJunchao Zhang   /* In Gatherv, each root only has one leaf. So we just need to bcast rootdata to leafupdate and then reduce leafdata to rootdata */
106dd5b3ca6SJunchao Zhang   ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr);
107dd5b3ca6SJunchao Zhang   ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr);
108dd5b3ca6SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
109dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
110dd5b3ca6SJunchao Zhang }
111dd5b3ca6SJunchao Zhang 
112dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Gatherv(PetscSF sf)
113dd5b3ca6SJunchao Zhang {
114dd5b3ca6SJunchao Zhang   PetscErrorCode  ierr;
115dd5b3ca6SJunchao Zhang   PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data;
116dd5b3ca6SJunchao Zhang 
117dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
118dd5b3ca6SJunchao Zhang   /* Inherit from Allgatherv */
119dd5b3ca6SJunchao Zhang   sf->ops->SetUp           = PetscSFSetUp_Allgatherv;
120dd5b3ca6SJunchao Zhang   sf->ops->Reset           = PetscSFReset_Allgatherv;
121dd5b3ca6SJunchao Zhang   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
122dd5b3ca6SJunchao Zhang   sf->ops->GetGraph        = PetscSFGetGraph_Allgatherv;
123dd5b3ca6SJunchao Zhang   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
124dd5b3ca6SJunchao Zhang   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
125dd5b3ca6SJunchao Zhang   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
126dd5b3ca6SJunchao Zhang   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
127dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
128dd5b3ca6SJunchao Zhang   sf->ops->CreateLocalSF   = PetscSFCreateLocalSF_Allgatherv;
129dd5b3ca6SJunchao Zhang 
130dd5b3ca6SJunchao Zhang   /* Gatherv stuff */
131dd5b3ca6SJunchao Zhang   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Gatherv;
132dd5b3ca6SJunchao Zhang   sf->ops->ReduceBegin     = PetscSFReduceBegin_Gatherv;
133dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
134dd5b3ca6SJunchao Zhang 
135dd5b3ca6SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
136dd5b3ca6SJunchao Zhang   sf->data = (void*)dat;
137dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
138dd5b3ca6SJunchao Zhang }
139