171438e86SJunchao Zhang /* Mainly for MPI_Isend in SFBASIC. Once SFNEIGHBOR, SFALLGHATERV etc have a persistent version, 271438e86SJunchao Zhang we can also do abstractions like Prepare/StartCommunication. 371438e86SJunchao Zhang */ 471438e86SJunchao Zhang 571438e86SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 671438e86SJunchao Zhang 771438e86SJunchao Zhang /* Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf */ 8d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFLinkStartRequests_MPI(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 9d71ae5a4SJacob Faibussowitsch { 1071438e86SJunchao Zhang PetscMPIInt nreqs; 1171438e86SJunchao Zhang MPI_Request *reqs = NULL; 1271438e86SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 1371438e86SJunchao Zhang PetscInt buflen; 1471438e86SJunchao Zhang 1571438e86SJunchao Zhang PetscFunctionBegin; 1671438e86SJunchao Zhang buflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE]; 1771438e86SJunchao Zhang if (buflen) { 1871438e86SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 1971438e86SJunchao Zhang nreqs = sf->nleafreqs; 209566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &reqs)); 2171438e86SJunchao Zhang } else { /* leaf to root */ 2271438e86SJunchao Zhang nreqs = bas->nrootreqs; 239566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &reqs, NULL)); 2471438e86SJunchao Zhang } 259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Startall_irecv(buflen, link->unit, nreqs, reqs)); 2671438e86SJunchao Zhang } 2771438e86SJunchao Zhang 2871438e86SJunchao Zhang buflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE]; 2971438e86SJunchao Zhang if (buflen) { 3071438e86SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 3171438e86SJunchao Zhang nreqs = bas->nrootreqs; 329566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */)); 339566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &reqs, NULL)); 3471438e86SJunchao Zhang } else { /* leaf to root */ 3571438e86SJunchao Zhang nreqs = sf->nleafreqs; 369566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE)); 379566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &reqs)); 3871438e86SJunchao Zhang } 399566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction)); 409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Startall_isend(buflen, link->unit, nreqs, reqs)); 4171438e86SJunchao Zhang } 423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4371438e86SJunchao Zhang } 4471438e86SJunchao Zhang 45d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFLinkWaitRequests_MPI(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 46d71ae5a4SJacob Faibussowitsch { 4771438e86SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 4871438e86SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 4971438e86SJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 5071438e86SJunchao Zhang 5171438e86SJunchao Zhang PetscFunctionBegin; 529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE)); 539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE)); 5471438e86SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 559566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */)); 5671438e86SJunchao Zhang } else { 579566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE)); 5871438e86SJunchao Zhang } 593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6071438e86SJunchao Zhang } 6171438e86SJunchao Zhang 62*715b587bSJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM) 63*715b587bSJunchao Zhang // issue MPIX_Isend/Irecv_enqueue() 64*715b587bSJunchao Zhang static PetscErrorCode PetscSFLinkStartEnqueue_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 65*715b587bSJunchao Zhang { 66*715b587bSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 67*715b587bSJunchao Zhang PetscInt i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks; 68*715b587bSJunchao Zhang const PetscInt *rootoffset, *leafoffset; 69*715b587bSJunchao Zhang MPI_Aint disp; 70*715b587bSJunchao Zhang MPI_Comm stream_comm = sf->stream_comm; 71*715b587bSJunchao Zhang MPI_Datatype unit = link->unit; 72*715b587bSJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */ 73*715b587bSJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 74*715b587bSJunchao Zhang 75*715b587bSJunchao Zhang PetscFunctionBegin; 76*715b587bSJunchao Zhang if (bas->rootbuflen[PETSCSF_REMOTE]) { 77*715b587bSJunchao Zhang PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 78*715b587bSJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 79*715b587bSJunchao Zhang for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 80*715b587bSJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 81*715b587bSJunchao Zhang cnt = rootoffset[i + 1] - rootoffset[i]; 82*715b587bSJunchao Zhang PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 83*715b587bSJunchao Zhang } 84*715b587bSJunchao Zhang } else { // PETSCSF_ROOT2LEAF 85*715b587bSJunchao Zhang for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 86*715b587bSJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 87*715b587bSJunchao Zhang cnt = rootoffset[i + 1] - rootoffset[i]; 88*715b587bSJunchao Zhang // no need to sync the gpu stream! 89*715b587bSJunchao Zhang PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 90*715b587bSJunchao Zhang } 91*715b587bSJunchao Zhang } 92*715b587bSJunchao Zhang } 93*715b587bSJunchao Zhang 94*715b587bSJunchao Zhang if (sf->leafbuflen[PETSCSF_REMOTE]) { 95*715b587bSJunchao Zhang PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 96*715b587bSJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 97*715b587bSJunchao Zhang for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 98*715b587bSJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 99*715b587bSJunchao Zhang cnt = leafoffset[i + 1] - leafoffset[i]; 100*715b587bSJunchao Zhang // no need to sync the gpu stream! 101*715b587bSJunchao Zhang PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 102*715b587bSJunchao Zhang } 103*715b587bSJunchao Zhang } else { // PETSCSF_ROOT2LEAF 104*715b587bSJunchao Zhang for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 105*715b587bSJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 106*715b587bSJunchao Zhang cnt = leafoffset[i + 1] - leafoffset[i]; 107*715b587bSJunchao Zhang PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 108*715b587bSJunchao Zhang } 109*715b587bSJunchao Zhang } 110*715b587bSJunchao Zhang } 111*715b587bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 112*715b587bSJunchao Zhang } 113*715b587bSJunchao Zhang 114*715b587bSJunchao Zhang static PetscErrorCode PetscSFLinkWaitEnqueue_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 115*715b587bSJunchao Zhang { 116*715b587bSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 117*715b587bSJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 118*715b587bSJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 119*715b587bSJunchao Zhang 120*715b587bSJunchao Zhang PetscFunctionBegin; 121*715b587bSJunchao Zhang PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE)); 122*715b587bSJunchao Zhang PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE)); 123*715b587bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 124*715b587bSJunchao Zhang } 125*715b587bSJunchao Zhang #endif 126*715b587bSJunchao Zhang 12771438e86SJunchao Zhang /* 12871438e86SJunchao Zhang The routine Creates a communication link for the given operation. It first looks up its link cache. If 12971438e86SJunchao Zhang there is a free & suitable one, it uses it. Otherwise it creates a new one. 13071438e86SJunchao Zhang 13171438e86SJunchao Zhang A link contains buffers and MPI requests for send/recv. It also contains pack/unpack routines to pack/unpack 13271438e86SJunchao Zhang root/leafdata to/from these buffers. Buffers are allocated at our discretion. When we find root/leafata 13371438e86SJunchao Zhang can be directly passed to MPI, we won't allocate them. Even we allocate buffers, we only allocate 13471438e86SJunchao Zhang those that are needed by the given `sfop` and `op`, in other words, we do lazy memory-allocation. 13571438e86SJunchao Zhang 13671438e86SJunchao Zhang The routine also allocates buffers on CPU when one does not use gpu-aware MPI but data is on GPU. 13771438e86SJunchao Zhang 13871438e86SJunchao Zhang In SFBasic, MPI requests are persistent. They are init'ed until we try to get requests from a link. 13971438e86SJunchao Zhang 14071438e86SJunchao Zhang The routine is shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs and 14171438e86SJunchao Zhang need pack/unpack data. 14271438e86SJunchao Zhang */ 143d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFLinkCreate_MPI(PetscSF sf, MPI_Datatype unit, PetscMemType xrootmtype, const void *rootdata, PetscMemType xleafmtype, const void *leafdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *mylink) 144d71ae5a4SJacob Faibussowitsch { 14571438e86SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 14671438e86SJunchao Zhang PetscInt i, j, k, nrootreqs, nleafreqs, nreqs; 14771438e86SJunchao Zhang PetscSFLink *p, link; 14871438e86SJunchao Zhang PetscSFDirection direction; 14971438e86SJunchao Zhang MPI_Request *reqs = NULL; 15071438e86SJunchao Zhang PetscBool match, rootdirect[2], leafdirect[2]; 15171438e86SJunchao Zhang PetscMemType rootmtype = PetscMemTypeHost(xrootmtype) ? PETSC_MEMTYPE_HOST : PETSC_MEMTYPE_DEVICE; /* Convert to 0/1 as we will use it in subscript */ 15271438e86SJunchao Zhang PetscMemType leafmtype = PetscMemTypeHost(xleafmtype) ? PETSC_MEMTYPE_HOST : PETSC_MEMTYPE_DEVICE; 15371438e86SJunchao Zhang PetscMemType rootmtype_mpi, leafmtype_mpi; /* mtypes seen by MPI */ 15471438e86SJunchao Zhang PetscInt rootdirect_mpi, leafdirect_mpi; /* root/leafdirect seen by MPI*/ 15571438e86SJunchao Zhang 15671438e86SJunchao Zhang PetscFunctionBegin; 15771438e86SJunchao Zhang 15871438e86SJunchao Zhang /* Can we directly use root/leafdirect with the given sf, sfop and op? */ 15971438e86SJunchao Zhang for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) { 16071438e86SJunchao Zhang if (sfop == PETSCSF_BCAST) { 16171438e86SJunchao Zhang rootdirect[i] = bas->rootcontig[i]; /* Pack roots */ 16271438e86SJunchao Zhang leafdirect[i] = (sf->leafcontig[i] && op == MPI_REPLACE) ? PETSC_TRUE : PETSC_FALSE; /* Unpack leaves */ 16371438e86SJunchao Zhang } else if (sfop == PETSCSF_REDUCE) { 16471438e86SJunchao Zhang leafdirect[i] = sf->leafcontig[i]; /* Pack leaves */ 16571438e86SJunchao Zhang rootdirect[i] = (bas->rootcontig[i] && op == MPI_REPLACE) ? PETSC_TRUE : PETSC_FALSE; /* Unpack roots */ 16671438e86SJunchao Zhang } else { /* PETSCSF_FETCH */ 16771438e86SJunchao Zhang rootdirect[i] = PETSC_FALSE; /* FETCH always need a separate rootbuf */ 16871438e86SJunchao Zhang leafdirect[i] = PETSC_FALSE; /* We also force allocating a separate leafbuf so that leafdata and leafupdate can share mpi requests */ 16971438e86SJunchao Zhang } 17071438e86SJunchao Zhang } 17171438e86SJunchao Zhang 17271438e86SJunchao Zhang if (sf->use_gpu_aware_mpi) { 17371438e86SJunchao Zhang rootmtype_mpi = rootmtype; 17471438e86SJunchao Zhang leafmtype_mpi = leafmtype; 17571438e86SJunchao Zhang } else { 17671438e86SJunchao Zhang rootmtype_mpi = leafmtype_mpi = PETSC_MEMTYPE_HOST; 17771438e86SJunchao Zhang } 178da81f932SPierre Jolivet /* Will root/leafdata be directly accessed by MPI? Without use_gpu_aware_mpi, device data is buffered on host and then passed to MPI */ 17971438e86SJunchao Zhang rootdirect_mpi = rootdirect[PETSCSF_REMOTE] && (rootmtype_mpi == rootmtype) ? 1 : 0; 18071438e86SJunchao Zhang leafdirect_mpi = leafdirect[PETSCSF_REMOTE] && (leafmtype_mpi == leafmtype) ? 1 : 0; 18171438e86SJunchao Zhang 18271438e86SJunchao Zhang direction = (sfop == PETSCSF_BCAST) ? PETSCSF_ROOT2LEAF : PETSCSF_LEAF2ROOT; 18371438e86SJunchao Zhang nrootreqs = bas->nrootreqs; 18471438e86SJunchao Zhang nleafreqs = sf->nleafreqs; 18571438e86SJunchao Zhang 18671438e86SJunchao Zhang /* Look for free links in cache */ 18771438e86SJunchao Zhang for (p = &bas->avail; (link = *p); p = &link->next) { 18871438e86SJunchao Zhang if (!link->use_nvshmem) { /* Only check with MPI links */ 1899566063dSJacob Faibussowitsch PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match)); 19071438e86SJunchao Zhang if (match) { 19171438e86SJunchao Zhang /* If root/leafdata will be directly passed to MPI, test if the data used to initialized the MPI requests matches with the current. 19271438e86SJunchao Zhang If not, free old requests. New requests will be lazily init'ed until one calls PetscSFLinkGetMPIBuffersAndRequests(). 19371438e86SJunchao Zhang */ 19471438e86SJunchao Zhang if (rootdirect_mpi && sf->persistent && link->rootreqsinited[direction][rootmtype][1] && link->rootdatadirect[direction][rootmtype] != rootdata) { 19571438e86SJunchao Zhang reqs = link->rootreqs[direction][rootmtype][1]; /* Here, rootmtype = rootmtype_mpi */ 1969371c9d4SSatish Balay for (i = 0; i < nrootreqs; i++) { 1979371c9d4SSatish Balay if (reqs[i] != MPI_REQUEST_NULL) PetscCallMPI(MPI_Request_free(&reqs[i])); 1989371c9d4SSatish Balay } 19971438e86SJunchao Zhang link->rootreqsinited[direction][rootmtype][1] = PETSC_FALSE; 20071438e86SJunchao Zhang } 20171438e86SJunchao Zhang if (leafdirect_mpi && sf->persistent && link->leafreqsinited[direction][leafmtype][1] && link->leafdatadirect[direction][leafmtype] != leafdata) { 20271438e86SJunchao Zhang reqs = link->leafreqs[direction][leafmtype][1]; 2039371c9d4SSatish Balay for (i = 0; i < nleafreqs; i++) { 2049371c9d4SSatish Balay if (reqs[i] != MPI_REQUEST_NULL) PetscCallMPI(MPI_Request_free(&reqs[i])); 2059371c9d4SSatish Balay } 20671438e86SJunchao Zhang link->leafreqsinited[direction][leafmtype][1] = PETSC_FALSE; 20771438e86SJunchao Zhang } 20871438e86SJunchao Zhang *p = link->next; /* Remove from available list */ 20971438e86SJunchao Zhang goto found; 21071438e86SJunchao Zhang } 21171438e86SJunchao Zhang } 21271438e86SJunchao Zhang } 21371438e86SJunchao Zhang 2149566063dSJacob Faibussowitsch PetscCall(PetscNew(&link)); 2159566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSetUp_Host(sf, link, unit)); 2169566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(PetscObjectComm((PetscObject)sf), &link->tag)); /* One tag per link */ 21771438e86SJunchao Zhang 21871438e86SJunchao Zhang nreqs = (nrootreqs + nleafreqs) * 8; 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nreqs, &link->reqs)); 22071438e86SJunchao Zhang for (i = 0; i < nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */ 22171438e86SJunchao Zhang 22271438e86SJunchao Zhang for (i = 0; i < 2; i++) { /* Two communication directions */ 22371438e86SJunchao Zhang for (j = 0; j < 2; j++) { /* Two memory types */ 22471438e86SJunchao Zhang for (k = 0; k < 2; k++) { /* root/leafdirect 0 or 1 */ 22571438e86SJunchao Zhang link->rootreqs[i][j][k] = link->reqs + nrootreqs * (4 * i + 2 * j + k); 22671438e86SJunchao Zhang link->leafreqs[i][j][k] = link->reqs + nrootreqs * 8 + nleafreqs * (4 * i + 2 * j + k); 22771438e86SJunchao Zhang } 22871438e86SJunchao Zhang } 22971438e86SJunchao Zhang } 23071438e86SJunchao Zhang link->StartCommunication = PetscSFLinkStartRequests_MPI; 23171438e86SJunchao Zhang link->FinishCommunication = PetscSFLinkWaitRequests_MPI; 232*715b587bSJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM) 233*715b587bSJunchao Zhang if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) { 234*715b587bSJunchao Zhang link->StartCommunication = PetscSFLinkStartEnqueue_MPIX_Stream; 235*715b587bSJunchao Zhang link->FinishCommunication = PetscSFLinkWaitEnqueue_MPIX_Stream; 236*715b587bSJunchao Zhang } 237*715b587bSJunchao Zhang #endif 23871438e86SJunchao Zhang 23971438e86SJunchao Zhang found: 24071438e86SJunchao Zhang 24171438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 24271438e86SJunchao Zhang if ((PetscMemTypeDevice(xrootmtype) || PetscMemTypeDevice(xleafmtype)) && !link->deviceinited) { 24371438e86SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 2449566063dSJacob Faibussowitsch if (sf->backend == PETSCSF_BACKEND_CUDA) PetscCall(PetscSFLinkSetUp_CUDA(sf, link, unit)); /* Setup streams etc */ 24571438e86SJunchao Zhang #endif 24671438e86SJunchao Zhang #if defined(PETSC_HAVE_HIP) 2479566063dSJacob Faibussowitsch if (sf->backend == PETSCSF_BACKEND_HIP) PetscCall(PetscSFLinkSetUp_HIP(sf, link, unit)); /* Setup streams etc */ 24871438e86SJunchao Zhang #endif 24971438e86SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 2509566063dSJacob Faibussowitsch if (sf->backend == PETSCSF_BACKEND_KOKKOS) PetscCall(PetscSFLinkSetUp_Kokkos(sf, link, unit)); 25171438e86SJunchao Zhang #endif 25271438e86SJunchao Zhang } 25371438e86SJunchao Zhang #endif 25471438e86SJunchao Zhang 25571438e86SJunchao Zhang /* Allocate buffers along root/leafdata */ 25671438e86SJunchao Zhang for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) { 25771438e86SJunchao Zhang /* For local communication, buffers are only needed when roots and leaves have different mtypes */ 25871438e86SJunchao Zhang if (i == PETSCSF_LOCAL && rootmtype == leafmtype) continue; 25971438e86SJunchao Zhang if (bas->rootbuflen[i]) { 26071438e86SJunchao Zhang if (rootdirect[i]) { /* Aha, we disguise rootdata as rootbuf */ 26171438e86SJunchao Zhang link->rootbuf[i][rootmtype] = (char *)rootdata + bas->rootstart[i] * link->unitbytes; 26271438e86SJunchao Zhang } else { /* Have to have a separate rootbuf */ 26348a46eb9SPierre Jolivet if (!link->rootbuf_alloc[i][rootmtype]) PetscCall(PetscSFMalloc(sf, rootmtype, bas->rootbuflen[i] * link->unitbytes, (void **)&link->rootbuf_alloc[i][rootmtype])); 26471438e86SJunchao Zhang link->rootbuf[i][rootmtype] = link->rootbuf_alloc[i][rootmtype]; 26571438e86SJunchao Zhang } 26671438e86SJunchao Zhang } 26771438e86SJunchao Zhang 26871438e86SJunchao Zhang if (sf->leafbuflen[i]) { 26971438e86SJunchao Zhang if (leafdirect[i]) { 27071438e86SJunchao Zhang link->leafbuf[i][leafmtype] = (char *)leafdata + sf->leafstart[i] * link->unitbytes; 27171438e86SJunchao Zhang } else { 27248a46eb9SPierre Jolivet if (!link->leafbuf_alloc[i][leafmtype]) PetscCall(PetscSFMalloc(sf, leafmtype, sf->leafbuflen[i] * link->unitbytes, (void **)&link->leafbuf_alloc[i][leafmtype])); 27371438e86SJunchao Zhang link->leafbuf[i][leafmtype] = link->leafbuf_alloc[i][leafmtype]; 27471438e86SJunchao Zhang } 27571438e86SJunchao Zhang } 27671438e86SJunchao Zhang } 27771438e86SJunchao Zhang 27871438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 27971438e86SJunchao Zhang /* Allocate buffers on host for buffering data on device in cast not use_gpu_aware_mpi */ 28071438e86SJunchao Zhang if (PetscMemTypeDevice(rootmtype) && PetscMemTypeHost(rootmtype_mpi)) { 28148a46eb9SPierre Jolivet if (!link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]) PetscCall(PetscMalloc(bas->rootbuflen[PETSCSF_REMOTE] * link->unitbytes, &link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST])); 28271438e86SJunchao Zhang link->rootbuf[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST] = link->rootbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]; 28371438e86SJunchao Zhang } 28471438e86SJunchao Zhang if (PetscMemTypeDevice(leafmtype) && PetscMemTypeHost(leafmtype_mpi)) { 28548a46eb9SPierre Jolivet if (!link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]) PetscCall(PetscMalloc(sf->leafbuflen[PETSCSF_REMOTE] * link->unitbytes, &link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST])); 28671438e86SJunchao Zhang link->leafbuf[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST] = link->leafbuf_alloc[PETSCSF_REMOTE][PETSC_MEMTYPE_HOST]; 28771438e86SJunchao Zhang } 28871438e86SJunchao Zhang #endif 28971438e86SJunchao Zhang 29071438e86SJunchao Zhang /* Set `current` state of the link. They may change between different SF invocations with the same link */ 29171438e86SJunchao Zhang if (sf->persistent) { /* If data is directly passed to MPI and inits MPI requests, record the data for comparison on future invocations */ 29271438e86SJunchao Zhang if (rootdirect_mpi) link->rootdatadirect[direction][rootmtype] = rootdata; 29371438e86SJunchao Zhang if (leafdirect_mpi) link->leafdatadirect[direction][leafmtype] = leafdata; 29471438e86SJunchao Zhang } 29571438e86SJunchao Zhang 29671438e86SJunchao Zhang link->rootdata = rootdata; /* root/leafdata are keys to look up links in PetscSFXxxEnd */ 29771438e86SJunchao Zhang link->leafdata = leafdata; 29871438e86SJunchao Zhang for (i = PETSCSF_LOCAL; i <= PETSCSF_REMOTE; i++) { 29971438e86SJunchao Zhang link->rootdirect[i] = rootdirect[i]; 30071438e86SJunchao Zhang link->leafdirect[i] = leafdirect[i]; 30171438e86SJunchao Zhang } 30271438e86SJunchao Zhang link->rootdirect_mpi = rootdirect_mpi; 30371438e86SJunchao Zhang link->leafdirect_mpi = leafdirect_mpi; 30471438e86SJunchao Zhang link->rootmtype = rootmtype; 30571438e86SJunchao Zhang link->leafmtype = leafmtype; 30671438e86SJunchao Zhang link->rootmtype_mpi = rootmtype_mpi; 30771438e86SJunchao Zhang link->leafmtype_mpi = leafmtype_mpi; 30871438e86SJunchao Zhang 30971438e86SJunchao Zhang link->next = bas->inuse; 31071438e86SJunchao Zhang bas->inuse = link; 31171438e86SJunchao Zhang *mylink = link; 3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31371438e86SJunchao Zhang } 314