xref: /petsc/src/vec/is/sf/impls/basic/sfmpi.c (revision 5c0db29a52f0e0a315a431c2b5d53138ecf7c054)
171438e86SJunchao Zhang /* Mainly for MPI_Isend in SFBASIC. Once SFNEIGHBOR, SFALLGHATERV etc have a persistent version,
271438e86SJunchao Zhang    we can also do abstractions like Prepare/StartCommunication.
371438e86SJunchao Zhang */
471438e86SJunchao Zhang 
571438e86SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
671438e86SJunchao Zhang 
771438e86SJunchao Zhang /* Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf */
8d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFLinkStartRequests_MPI(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
9d71ae5a4SJacob Faibussowitsch {
1071438e86SJunchao Zhang   PetscMPIInt    nreqs;
1171438e86SJunchao Zhang   MPI_Request   *reqs = NULL;
1271438e86SJunchao Zhang   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
1371438e86SJunchao Zhang   PetscInt       buflen;
1471438e86SJunchao Zhang 
1571438e86SJunchao Zhang   PetscFunctionBegin;
1671438e86SJunchao Zhang   buflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
1771438e86SJunchao Zhang   if (buflen) {
1871438e86SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
1971438e86SJunchao Zhang       nreqs = sf->nleafreqs;
209566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &reqs));
2171438e86SJunchao Zhang     } else { /* leaf to root */
2271438e86SJunchao Zhang       nreqs = bas->nrootreqs;
239566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &reqs, NULL));
2471438e86SJunchao Zhang     }
259566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Startall_irecv(buflen, link->unit, nreqs, reqs));
2671438e86SJunchao Zhang   }
2771438e86SJunchao Zhang 
2871438e86SJunchao Zhang   buflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
2971438e86SJunchao Zhang   if (buflen) {
3071438e86SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
3171438e86SJunchao Zhang       nreqs = bas->nrootreqs;
329566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
339566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &reqs, NULL));
3471438e86SJunchao Zhang     } else { /* leaf to root */
3571438e86SJunchao Zhang       nreqs = sf->nleafreqs;
369566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
379566063dSJacob Faibussowitsch       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &reqs));
3871438e86SJunchao Zhang     }
399566063dSJacob Faibussowitsch     PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction));
409566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Startall_isend(buflen, link->unit, nreqs, reqs));
4171438e86SJunchao Zhang   }
423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4371438e86SJunchao Zhang }
4471438e86SJunchao Zhang 
45d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFLinkWaitRequests_MPI(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
46d71ae5a4SJacob Faibussowitsch {
4771438e86SJunchao Zhang   PetscSF_Basic     *bas           = (PetscSF_Basic *)sf->data;
4871438e86SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
4971438e86SJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
5071438e86SJunchao Zhang 
5171438e86SJunchao Zhang   PetscFunctionBegin;
529566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
5471438e86SJunchao Zhang   if (direction == PETSCSF_ROOT2LEAF) {
559566063dSJacob Faibussowitsch     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */));
5671438e86SJunchao Zhang   } else {
579566063dSJacob Faibussowitsch     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE));
5871438e86SJunchao Zhang   }
593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6071438e86SJunchao Zhang }
6171438e86SJunchao Zhang 
62715b587bSJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM)
63715b587bSJunchao Zhang // issue MPIX_Isend/Irecv_enqueue()
64715b587bSJunchao Zhang static PetscErrorCode PetscSFLinkStartEnqueue_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
65715b587bSJunchao Zhang {
66715b587bSJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
67715b587bSJunchao Zhang   PetscInt           i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks;
68715b587bSJunchao Zhang   const PetscInt    *rootoffset, *leafoffset;
69715b587bSJunchao Zhang   MPI_Aint           disp;
70715b587bSJunchao Zhang   MPI_Comm           stream_comm   = sf->stream_comm;
71715b587bSJunchao Zhang   MPI_Datatype       unit          = link->unit;
72715b587bSJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
73715b587bSJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
74715b587bSJunchao Zhang 
75715b587bSJunchao Zhang   PetscFunctionBegin;
76715b587bSJunchao Zhang   if (bas->rootbuflen[PETSCSF_REMOTE]) {
77715b587bSJunchao Zhang     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
78715b587bSJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
79715b587bSJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
80715b587bSJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
81715b587bSJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
82715b587bSJunchao Zhang         PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
83715b587bSJunchao Zhang       }
84715b587bSJunchao Zhang     } else { // PETSCSF_ROOT2LEAF
85715b587bSJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
86715b587bSJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
87715b587bSJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
88715b587bSJunchao Zhang         // no need to sync the gpu stream!
89715b587bSJunchao Zhang         PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
90715b587bSJunchao Zhang       }
91715b587bSJunchao Zhang     }
92715b587bSJunchao Zhang   }
93715b587bSJunchao Zhang 
94715b587bSJunchao Zhang   if (sf->leafbuflen[PETSCSF_REMOTE]) {
95715b587bSJunchao Zhang     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
96715b587bSJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
97715b587bSJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
98715b587bSJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
99715b587bSJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
100715b587bSJunchao Zhang         // no need to sync the gpu stream!
101715b587bSJunchao Zhang         PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
102715b587bSJunchao Zhang       }
103715b587bSJunchao Zhang     } else { // PETSCSF_ROOT2LEAF
104715b587bSJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
105715b587bSJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
106715b587bSJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
107715b587bSJunchao Zhang         PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
108715b587bSJunchao Zhang       }
109715b587bSJunchao Zhang     }
110715b587bSJunchao Zhang   }
111715b587bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
112715b587bSJunchao Zhang }
113715b587bSJunchao Zhang 
114715b587bSJunchao Zhang static PetscErrorCode PetscSFLinkWaitEnqueue_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
115715b587bSJunchao Zhang {
116715b587bSJunchao Zhang   PetscSF_Basic     *bas           = (PetscSF_Basic *)sf->data;
117715b587bSJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
118715b587bSJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
119715b587bSJunchao Zhang 
120715b587bSJunchao Zhang   PetscFunctionBegin;
121715b587bSJunchao Zhang   PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
122715b587bSJunchao Zhang   PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
123715b587bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
124715b587bSJunchao Zhang }
125715b587bSJunchao Zhang #endif
126715b587bSJunchao Zhang 
12771438e86SJunchao Zhang /*
12871438e86SJunchao Zhang    The routine Creates a communication link for the given operation. It first looks up its link cache. If
12971438e86SJunchao Zhang    there is a free & suitable one, it uses it. Otherwise it creates a new one.
13071438e86SJunchao Zhang 
13171438e86SJunchao Zhang    A link contains buffers and MPI requests for send/recv. It also contains pack/unpack routines to pack/unpack
13271438e86SJunchao Zhang    root/leafdata to/from these buffers. Buffers are allocated at our discretion. When we find root/leafata
13371438e86SJunchao Zhang    can be directly passed to MPI, we won't allocate them. Even we allocate buffers, we only allocate
13471438e86SJunchao Zhang    those that are needed by the given `sfop` and `op`, in other words, we do lazy memory-allocation.
13571438e86SJunchao Zhang 
13671438e86SJunchao Zhang    The routine also allocates buffers on CPU when one does not use gpu-aware MPI but data is on GPU.
13771438e86SJunchao Zhang 
13871438e86SJunchao Zhang    In SFBasic, MPI requests are persistent. They are init'ed until we try to get requests from a link.
13971438e86SJunchao Zhang 
14071438e86SJunchao Zhang    The routine is shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs and
14171438e86SJunchao Zhang    need pack/unpack data.
14271438e86SJunchao Zhang */
143d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFLinkCreate_MPI(PetscSF sf, MPI_Datatype unit, PetscMemType xrootmtype, const void *rootdata, PetscMemType xleafmtype, const void *leafdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *mylink)
144d71ae5a4SJacob Faibussowitsch {
14571438e86SJunchao Zhang   PetscSF_Basic   *bas = (PetscSF_Basic *)sf->data;
14671438e86SJunchao Zhang   PetscInt         i, j, k, nrootreqs, nleafreqs, nreqs;
14771438e86SJunchao Zhang   PetscSFLink     *p, link;
14871438e86SJunchao Zhang   PetscSFDirection direction;
14971438e86SJunchao Zhang   MPI_Request     *reqs = NULL;
15071438e86SJunchao Zhang   PetscBool        match, rootdirect[2], leafdirect[2];
15171438e86SJunchao Zhang   PetscMemType     rootmtype = PetscMemTypeHost(xrootmtype) ? PETSC_MEMTYPE_HOST : PETSC_MEMTYPE_DEVICE; /* Convert to 0/1 as we will use it in subscript */
15271438e86SJunchao Zhang   PetscMemType     leafmtype = PetscMemTypeHost(xleafmtype) ? PETSC_MEMTYPE_HOST : PETSC_MEMTYPE_DEVICE;
15371438e86SJunchao Zhang   PetscMemType     rootmtype_mpi, leafmtype_mpi;   /* mtypes seen by MPI */
15471438e86SJunchao Zhang   PetscInt         rootdirect_mpi, leafdirect_mpi; /* root/leafdirect seen by MPI*/
15571438e86SJunchao Zhang 
15671438e86SJunchao Zhang   PetscFunctionBegin;
15771438e86SJunchao Zhang 
15871438e86SJunchao Zhang   /* Can we directly use root/leafdirect with the given sf, sfop and op? */
15971438e86SJunchao Zhang   for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) {
16071438e86SJunchao Zhang     if (sfop == PETSCSF_BCAST) {
16171438e86SJunchao Zhang       rootdirect[i] = bas->rootcontig[i];                                                  /* Pack roots */
16271438e86SJunchao Zhang       leafdirect[i] = (sf->leafcontig[i] && op == MPI_REPLACE) ? PETSC_TRUE : PETSC_FALSE; /* Unpack leaves */
16371438e86SJunchao Zhang     } else if (sfop == PETSCSF_REDUCE) {
16471438e86SJunchao Zhang       leafdirect[i] = sf->leafcontig[i];                                                    /* Pack leaves */
16571438e86SJunchao Zhang       rootdirect[i] = (bas->rootcontig[i] && op == MPI_REPLACE) ? PETSC_TRUE : PETSC_FALSE; /* Unpack roots */
16671438e86SJunchao Zhang     } else {                                                                                /* PETSCSF_FETCH */
16771438e86SJunchao Zhang       rootdirect[i] = PETSC_FALSE;                                                          /* FETCH always need a separate rootbuf */
16871438e86SJunchao Zhang       leafdirect[i] = PETSC_FALSE;                                                          /* We also force allocating a separate leafbuf so that leafdata and leafupdate can share mpi requests */
16971438e86SJunchao Zhang     }
17071438e86SJunchao Zhang   }
17171438e86SJunchao Zhang 
17271438e86SJunchao Zhang   if (sf->use_gpu_aware_mpi) {
17371438e86SJunchao Zhang     rootmtype_mpi = rootmtype;
17471438e86SJunchao Zhang     leafmtype_mpi = leafmtype;
17571438e86SJunchao Zhang   } else {
17671438e86SJunchao Zhang     rootmtype_mpi = leafmtype_mpi = PETSC_MEMTYPE_HOST;
17771438e86SJunchao Zhang   }
178da81f932SPierre Jolivet   /* Will root/leafdata be directly accessed by MPI?  Without use_gpu_aware_mpi, device data is buffered on host and then passed to MPI */
17971438e86SJunchao Zhang   rootdirect_mpi = rootdirect[PETSCSF_REMOTE] && (rootmtype_mpi == rootmtype) ? 1 : 0;
18071438e86SJunchao Zhang   leafdirect_mpi = leafdirect[PETSCSF_REMOTE] && (leafmtype_mpi == leafmtype) ? 1 : 0;
18171438e86SJunchao Zhang 
18271438e86SJunchao Zhang   direction = (sfop == PETSCSF_BCAST) ? PETSCSF_ROOT2LEAF : PETSCSF_LEAF2ROOT;
18371438e86SJunchao Zhang   nrootreqs = bas->nrootreqs;
18471438e86SJunchao Zhang   nleafreqs = sf->nleafreqs;
18571438e86SJunchao Zhang 
18671438e86SJunchao Zhang   /* Look for free links in cache */
18771438e86SJunchao Zhang   for (p = &bas->avail; (link = *p); p = &link->next) {
18871438e86SJunchao Zhang     if (!link->use_nvshmem) { /* Only check with MPI links */
1899566063dSJacob Faibussowitsch       PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
19071438e86SJunchao Zhang       if (match) {
19171438e86SJunchao Zhang         /* If root/leafdata will be directly passed to MPI, test if the data used to initialized the MPI requests matches with the current.
19271438e86SJunchao Zhang            If not, free old requests. New requests will be lazily init'ed until one calls PetscSFLinkGetMPIBuffersAndRequests().
19371438e86SJunchao Zhang         */
19471438e86SJunchao Zhang         if (rootdirect_mpi && sf->persistent && link->rootreqsinited[direction][rootmtype][1] && link->rootdatadirect[direction][rootmtype] != rootdata) {
19571438e86SJunchao Zhang           reqs = link->rootreqs[direction][rootmtype][1]; /* Here, rootmtype = rootmtype_mpi */
1969371c9d4SSatish Balay           for (i = 0; i < nrootreqs; i++) {
1979371c9d4SSatish Balay             if (reqs[i] != MPI_REQUEST_NULL) PetscCallMPI(MPI_Request_free(&reqs[i]));
1989371c9d4SSatish Balay           }
19971438e86SJunchao Zhang           link->rootreqsinited[direction][rootmtype][1] = PETSC_FALSE;
20071438e86SJunchao Zhang         }
20171438e86SJunchao Zhang         if (leafdirect_mpi && sf->persistent && link->leafreqsinited[direction][leafmtype][1] && link->leafdatadirect[direction][leafmtype] != leafdata) {
20271438e86SJunchao Zhang           reqs = link->leafreqs[direction][leafmtype][1];
2039371c9d4SSatish Balay           for (i = 0; i < nleafreqs; i++) {
2049371c9d4SSatish Balay             if (reqs[i] != MPI_REQUEST_NULL) PetscCallMPI(MPI_Request_free(&reqs[i]));
2059371c9d4SSatish Balay           }
20671438e86SJunchao Zhang           link->leafreqsinited[direction][leafmtype][1] = PETSC_FALSE;
20771438e86SJunchao Zhang         }
20871438e86SJunchao Zhang         *p = link->next; /* Remove from available list */
20971438e86SJunchao Zhang         goto found;
21071438e86SJunchao Zhang       }
21171438e86SJunchao Zhang     }
21271438e86SJunchao Zhang   }
21371438e86SJunchao Zhang 
2149566063dSJacob Faibussowitsch   PetscCall(PetscNew(&link));
2159566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkSetUp_Host(sf, link, unit));
2169566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(PetscObjectComm((PetscObject)sf), &link->tag)); /* One tag per link */
21771438e86SJunchao Zhang 
21871438e86SJunchao Zhang   nreqs = (nrootreqs + nleafreqs) * 8;
2199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nreqs, &link->reqs));
22071438e86SJunchao Zhang   for (i = 0; i < nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
22171438e86SJunchao Zhang 
222*5c0db29aSPierre Jolivet   if (nreqs)
22371438e86SJunchao Zhang     for (i = 0; i < 2; i++) {     /* Two communication directions */
22471438e86SJunchao Zhang       for (j = 0; j < 2; j++) {   /* Two memory types */
22571438e86SJunchao Zhang         for (k = 0; k < 2; k++) { /* root/leafdirect 0 or 1 */
22671438e86SJunchao Zhang           link->rootreqs[i][j][k] = link->reqs + nrootreqs * (4 * i + 2 * j + k);
22771438e86SJunchao Zhang           link->leafreqs[i][j][k] = link->reqs + nrootreqs * 8 + nleafreqs * (4 * i + 2 * j + k);
22871438e86SJunchao Zhang         }
22971438e86SJunchao Zhang       }
23071438e86SJunchao Zhang     }
23171438e86SJunchao Zhang   link->StartCommunication  = PetscSFLinkStartRequests_MPI;
23271438e86SJunchao Zhang   link->FinishCommunication = PetscSFLinkWaitRequests_MPI;
233715b587bSJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM)
234715b587bSJunchao Zhang   if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
235715b587bSJunchao Zhang     link->StartCommunication  = PetscSFLinkStartEnqueue_MPIX_Stream;
236715b587bSJunchao Zhang     link->FinishCommunication = PetscSFLinkWaitEnqueue_MPIX_Stream;
237715b587bSJunchao Zhang   }
238715b587bSJunchao Zhang #endif
23971438e86SJunchao Zhang 
24071438e86SJunchao Zhang found:
24171438e86SJunchao Zhang 
24271438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
24371438e86SJunchao Zhang   if ((PetscMemTypeDevice(xrootmtype) || PetscMemTypeDevice(xleafmtype)) && !link->deviceinited) {
24471438e86SJunchao Zhang   #if defined(PETSC_HAVE_CUDA)
2459566063dSJacob Faibussowitsch     if (sf->backend == PETSCSF_BACKEND_CUDA) PetscCall(PetscSFLinkSetUp_CUDA(sf, link, unit)); /* Setup streams etc */
24671438e86SJunchao Zhang   #endif
24771438e86SJunchao Zhang   #if defined(PETSC_HAVE_HIP)
2489566063dSJacob Faibussowitsch     if (sf->backend == PETSCSF_BACKEND_HIP) PetscCall(PetscSFLinkSetUp_HIP(sf, link, unit)); /* Setup streams etc */
24971438e86SJunchao Zhang   #endif
25071438e86SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS)
2519566063dSJacob Faibussowitsch     if (sf->backend == PETSCSF_BACKEND_KOKKOS) PetscCall(PetscSFLinkSetUp_Kokkos(sf, link, unit));
25271438e86SJunchao Zhang   #endif
25371438e86SJunchao Zhang   }
25471438e86SJunchao Zhang #endif
25571438e86SJunchao Zhang 
25671438e86SJunchao Zhang   /* Allocate buffers along root/leafdata */
25771438e86SJunchao Zhang   for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) {
25871438e86SJunchao Zhang     /* For local communication, buffers are only needed when roots and leaves have different mtypes */
25971438e86SJunchao Zhang     if (i == PETSCSF_LOCAL && rootmtype == leafmtype) continue;
26071438e86SJunchao Zhang     if (bas->rootbuflen[i]) {
26171438e86SJunchao Zhang       if (rootdirect[i]) { /* Aha, we disguise rootdata as rootbuf */
26271438e86SJunchao Zhang         link->rootbuf[i][rootmtype] = (char *)rootdata + bas->rootstart[i] * link->unitbytes;
26371438e86SJunchao Zhang       } else { /* Have to have a separate rootbuf */
26448a46eb9SPierre Jolivet         if (!link->rootbuf_alloc[i][rootmtype]) PetscCall(PetscSFMalloc(sf, rootmtype, bas->rootbuflen[i] * link->unitbytes, (void **)&link->rootbuf_alloc[i][rootmtype]));
26571438e86SJunchao Zhang         link->rootbuf[i][rootmtype] = link->rootbuf_alloc[i][rootmtype];
26671438e86SJunchao Zhang       }
26771438e86SJunchao Zhang     }
26871438e86SJunchao Zhang 
26971438e86SJunchao Zhang     if (sf->leafbuflen[i]) {
27071438e86SJunchao Zhang       if (leafdirect[i]) {
27171438e86SJunchao Zhang         link->leafbuf[i][leafmtype] = (char *)leafdata + sf->leafstart[i] * link->unitbytes;
27271438e86SJunchao Zhang       } else {
27348a46eb9SPierre Jolivet         if (!link->leafbuf_alloc[i][leafmtype]) PetscCall(PetscSFMalloc(sf, leafmtype, sf->leafbuflen[i] * link->unitbytes, (void **)&link->leafbuf_alloc[i][leafmtype]));
27471438e86SJunchao Zhang         link->leafbuf[i][leafmtype] = link->leafbuf_alloc[i][leafmtype];
27571438e86SJunchao Zhang       }
27671438e86SJunchao Zhang     }
27771438e86SJunchao Zhang   }
27871438e86SJunchao Zhang 
27971438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
28071438e86SJunchao Zhang   /* Allocate buffers on host for buffering data on device in cast not use_gpu_aware_mpi */
28171438e86SJunchao Zhang   if (PetscMemTypeDevice(rootmtype) && PetscMemTypeHost(rootmtype_mpi)) {
28248a46eb9SPierre Jolivet     if (!link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]) PetscCall(PetscMalloc(bas->rootbuflen[PETSCSF_REMOTE] * link->unitbytes, &link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]));
28371438e86SJunchao Zhang     link->rootbuf[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST] = link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST];
28471438e86SJunchao Zhang   }
28571438e86SJunchao Zhang   if (PetscMemTypeDevice(leafmtype) && PetscMemTypeHost(leafmtype_mpi)) {
28648a46eb9SPierre Jolivet     if (!link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]) PetscCall(PetscMalloc(sf->leafbuflen[PETSCSF_REMOTE] * link->unitbytes, &link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]));
28771438e86SJunchao Zhang     link->leafbuf[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST] = link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST];
28871438e86SJunchao Zhang   }
28971438e86SJunchao Zhang #endif
29071438e86SJunchao Zhang 
29171438e86SJunchao Zhang   /* Set `current` state of the link. They may change between different SF invocations with the same link */
29271438e86SJunchao Zhang   if (sf->persistent) { /* If data is directly passed to MPI and inits MPI requests, record the data for comparison on future invocations */
29371438e86SJunchao Zhang     if (rootdirect_mpi) link->rootdatadirect[direction][rootmtype] = rootdata;
29471438e86SJunchao Zhang     if (leafdirect_mpi) link->leafdatadirect[direction][leafmtype] = leafdata;
29571438e86SJunchao Zhang   }
29671438e86SJunchao Zhang 
29771438e86SJunchao Zhang   link->rootdata = rootdata; /* root/leafdata are keys to look up links in PetscSFXxxEnd */
29871438e86SJunchao Zhang   link->leafdata = leafdata;
29971438e86SJunchao Zhang   for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) {
30071438e86SJunchao Zhang     link->rootdirect[i] = rootdirect[i];
30171438e86SJunchao Zhang     link->leafdirect[i] = leafdirect[i];
30271438e86SJunchao Zhang   }
30371438e86SJunchao Zhang   link->rootdirect_mpi = rootdirect_mpi;
30471438e86SJunchao Zhang   link->leafdirect_mpi = leafdirect_mpi;
30571438e86SJunchao Zhang   link->rootmtype      = rootmtype;
30671438e86SJunchao Zhang   link->leafmtype      = leafmtype;
30771438e86SJunchao Zhang   link->rootmtype_mpi  = rootmtype_mpi;
30871438e86SJunchao Zhang   link->leafmtype_mpi  = leafmtype_mpi;
30971438e86SJunchao Zhang 
31071438e86SJunchao Zhang   link->next = bas->inuse;
31171438e86SJunchao Zhang   bas->inuse = link;
31271438e86SJunchao Zhang   *mylink    = link;
3133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31471438e86SJunchao Zhang }
315