1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang 3eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op); 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /*===================================================================================*/ 6eb02082bSJunchao Zhang /* Internal routines for PetscSFPack */ 7dd5b3ca6SJunchao Zhang /*===================================================================================*/ 8eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGet_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFPack *mylink) 9dd5b3ca6SJunchao Zhang { 10dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 11eb02082bSJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 12eb02082bSJunchao Zhang PetscSFPack link,*p; 13eb02082bSJunchao Zhang PetscBool match; 14eb02082bSJunchao Zhang PetscInt i,j; 15dd5b3ca6SJunchao Zhang 16dd5b3ca6SJunchao Zhang PetscFunctionBegin; 17eb02082bSJunchao Zhang ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 18dd5b3ca6SJunchao Zhang /* Look for types in cache */ 19eb02082bSJunchao Zhang for (p=&dat->avail; (link=*p); p=&link->next) { 20dd5b3ca6SJunchao Zhang ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 21dd5b3ca6SJunchao Zhang if (match) { 22dd5b3ca6SJunchao Zhang *p = link->next; /* Remove from available list */ 23dd5b3ca6SJunchao Zhang goto found; 24dd5b3ca6SJunchao Zhang } 25dd5b3ca6SJunchao Zhang } 26dd5b3ca6SJunchao Zhang 27dd5b3ca6SJunchao Zhang ierr = PetscNew(&link);CHKERRQ(ierr); 28eb02082bSJunchao Zhang ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr); 29*855db38dSJunchao Zhang 30*855db38dSJunchao Zhang link->rootbuflen = sf->nroots; 31*855db38dSJunchao Zhang link->leafbuflen = sf->nleaves; 32eb02082bSJunchao Zhang link->nrootreqs = 1; 33eb02082bSJunchao Zhang link->nleafreqs = 0; 34eb02082bSJunchao Zhang ierr = PetscMalloc1(4,&link->reqs);CHKERRQ(ierr); /* 4 = (nrootreqs+nleafreqs)*4 */ 35eb02082bSJunchao Zhang for (i=0; i<4; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */ 36dd5b3ca6SJunchao Zhang 37eb02082bSJunchao Zhang for (i=0; i<2; i++) { 38eb02082bSJunchao Zhang for (j=0; j<2; j++) { 39eb02082bSJunchao Zhang link->rootreqs[i][j] = link->reqs + (2*i+j); 40eb02082bSJunchao Zhang link->leafreqs[i][j] = NULL; /* leaf requests are not needed. Make it NULL to segfault accident use */ 41eb02082bSJunchao Zhang } 42eb02082bSJunchao Zhang } 43eb02082bSJunchao Zhang 44eb02082bSJunchao Zhang /* DO NOT allocate link->rootbuf[]/leafleaf[]. We use lazy allocation since these buffers are likely not needed */ 45dd5b3ca6SJunchao Zhang found: 46eb02082bSJunchao Zhang link->rootmtype = rootmtype; 47eb02082bSJunchao Zhang link->leafmtype = leafmtype; 48eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 49*855db38dSJunchao Zhang ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr); 50eb02082bSJunchao Zhang #endif 51eb02082bSJunchao Zhang link->rkey = rootdata; 52eb02082bSJunchao Zhang link->lkey = leafdata; 53dd5b3ca6SJunchao Zhang link->next = dat->inuse; 54eb02082bSJunchao Zhang dat->inuse = link; 55dd5b3ca6SJunchao Zhang 56dd5b3ca6SJunchao Zhang *mylink = link; 57dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 58dd5b3ca6SJunchao Zhang } 59dd5b3ca6SJunchao Zhang 60dd5b3ca6SJunchao Zhang /*===================================================================================*/ 61dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 62dd5b3ca6SJunchao Zhang /*===================================================================================*/ 63dd5b3ca6SJunchao Zhang 64dd5b3ca6SJunchao Zhang /* PetscSFGetGraph is non-collective. An implementation should not have collective calls */ 65dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetGraph_Allgatherv(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 66dd5b3ca6SJunchao Zhang { 67dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 68dd5b3ca6SJunchao Zhang PetscInt i,j,k; 69dd5b3ca6SJunchao Zhang const PetscInt *range; 70dd5b3ca6SJunchao Zhang PetscMPIInt size; 71dd5b3ca6SJunchao Zhang 72dd5b3ca6SJunchao Zhang PetscFunctionBegin; 73dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 74dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 75dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 76dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous leaves */ 77dd5b3ca6SJunchao Zhang if (iremote) { 78dd5b3ca6SJunchao Zhang if (!sf->remote && sf->nleaves) { /* The && sf->nleaves makes sfgatherv able to inherit this routine */ 79dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 80dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 81dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 82dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 83dd5b3ca6SJunchao Zhang for (j=range[i],k=0; j<range[i+1]; j++,k++) { 84dd5b3ca6SJunchao Zhang sf->remote[j].rank = i; 85dd5b3ca6SJunchao Zhang sf->remote[j].index = k; 86dd5b3ca6SJunchao Zhang } 87dd5b3ca6SJunchao Zhang } 88dd5b3ca6SJunchao Zhang } 89dd5b3ca6SJunchao Zhang *iremote = sf->remote; 90dd5b3ca6SJunchao Zhang } 91dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 92dd5b3ca6SJunchao Zhang } 93dd5b3ca6SJunchao Zhang 94dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Allgatherv(PetscSF sf) 95dd5b3ca6SJunchao Zhang { 96dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 97dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 98dd5b3ca6SJunchao Zhang PetscMPIInt size; 99dd5b3ca6SJunchao Zhang PetscInt i; 100dd5b3ca6SJunchao Zhang const PetscInt *range; 101dd5b3ca6SJunchao Zhang 102dd5b3ca6SJunchao Zhang PetscFunctionBegin; 103dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 104dd5b3ca6SJunchao Zhang if (sf->nleaves) { /* This if (sf->nleaves) test makes sfgatherv able to inherit this routine */ 105dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->recvcounts);CHKERRQ(ierr); 106dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->displs);CHKERRQ(ierr); 107dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 108dd5b3ca6SJunchao Zhang 109dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 110dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(range[i],&dat->displs[i]);CHKERRQ(ierr); 111dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(range[i+1]-range[i],&dat->recvcounts[i]);CHKERRQ(ierr); 112dd5b3ca6SJunchao Zhang } 113dd5b3ca6SJunchao Zhang } 114dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 115dd5b3ca6SJunchao Zhang } 116dd5b3ca6SJunchao Zhang 117dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Allgatherv(PetscSF sf) 118dd5b3ca6SJunchao Zhang { 119dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 120eb02082bSJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 121dd5b3ca6SJunchao Zhang 122dd5b3ca6SJunchao Zhang PetscFunctionBegin; 123dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->iranks);CHKERRQ(ierr); 124dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->ioffset);CHKERRQ(ierr); 125dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->irootloc);CHKERRQ(ierr); 126dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->recvcounts);CHKERRQ(ierr); 127dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->displs);CHKERRQ(ierr); 128dd5b3ca6SJunchao Zhang if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 12964f49babSJed Brown ierr = PetscSFPackDestroyAvailable(&dat->avail);CHKERRQ(ierr); 130dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 131dd5b3ca6SJunchao Zhang } 132dd5b3ca6SJunchao Zhang 133dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Allgatherv(PetscSF sf) 134dd5b3ca6SJunchao Zhang { 135dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 136dd5b3ca6SJunchao Zhang 137dd5b3ca6SJunchao Zhang PetscFunctionBegin; 138dd5b3ca6SJunchao Zhang ierr = PetscSFReset_Allgatherv(sf);CHKERRQ(ierr); 139dd5b3ca6SJunchao Zhang ierr = PetscFree(sf->data);CHKERRQ(ierr); 140dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 141dd5b3ca6SJunchao Zhang } 142dd5b3ca6SJunchao Zhang 143*855db38dSJunchao Zhang /* 144*855db38dSJunchao Zhang Prepare the rootbuf, leafbuf etc used by MPI in PetscSFBcastAndOpBegin. 145*855db38dSJunchao Zhang 146*855db38dSJunchao Zhang Input Arguments: 147*855db38dSJunchao Zhang + sf - the start forest 148*855db38dSJunchao Zhang . link - the link PetscSFBcastAndOp is currently using 149*855db38dSJunchao Zhang - op - the reduction op 150*855db38dSJunchao Zhang 151*855db38dSJunchao Zhang Output Arguments: 152*855db38dSJunchao Zhang +rootmtype_mpi - memtype of rootbuf_mpi 153*855db38dSJunchao Zhang .rootbuf_mpi - root buffer used by MPI in the following MPI call 154*855db38dSJunchao Zhang .leafmtype_mpi - memtype of leafbuf_mpi 155*855db38dSJunchao Zhang -leafbuf_mpi - leaf buffer used by MPI in the following MPI call 156*855db38dSJunchao Zhang 157*855db38dSJunchao Zhang Notes: 158*855db38dSJunchao Zhang This function was created because things became complex when rootdata or leafdata is on device, but the user does not want to use GPU-aware MPI. 159*855db38dSJunchao Zhang We have to copy data from device to host before doing MPI. This function encapsulates all varieties and is reused by Allgatherv & Allgahter. 160*855db38dSJunchao Zhang */ 161*855db38dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastPrepareMPIBuffers_Allgatherv(PetscSF sf,PetscSFPack link,MPI_Op op,PetscMemType *rootmtype_mpi,const void **rootbuf_mpi,PetscMemType *leafmtype_mpi, void **leafbuf_mpi) 162*855db38dSJunchao Zhang { 163*855db38dSJunchao Zhang PetscErrorCode ierr; 164*855db38dSJunchao Zhang 165*855db38dSJunchao Zhang PetscFunctionBegin; 166*855db38dSJunchao Zhang /* If rootdata is on device but no gpu-aware mpi, we need to copy rootdata to rootbuf on host before bcast; otherwise we directly bcast from leafdata */ 167*855db38dSJunchao Zhang if (link->rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { 168*855db38dSJunchao Zhang if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 169*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_HOST,PETSC_MEMTYPE_DEVICE,link->rootbuf[PETSC_MEMTYPE_HOST],link->rkey,link->rootbuflen*link->unitbytes);CHKERRQ(ierr); 170*855db38dSJunchao Zhang *rootbuf_mpi = link->rootbuf[PETSC_MEMTYPE_HOST]; 171*855db38dSJunchao Zhang *rootmtype_mpi = PETSC_MEMTYPE_HOST; 172*855db38dSJunchao Zhang } else { 173*855db38dSJunchao Zhang *rootbuf_mpi = link->rkey; 174*855db38dSJunchao Zhang *rootmtype_mpi = link->rootmtype; 175*855db38dSJunchao Zhang } 176*855db38dSJunchao Zhang 177*855db38dSJunchao Zhang if (link->leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* If leafdata is on device but no gpu-aware mpi, we need a leafbuf on host to receive bcast'ed data */ 178*855db38dSJunchao Zhang if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 179*855db38dSJunchao Zhang *leafbuf_mpi = link->leafbuf[PETSC_MEMTYPE_HOST]; 180*855db38dSJunchao Zhang *leafmtype_mpi = PETSC_MEMTYPE_HOST; 181*855db38dSJunchao Zhang } else if (op == MPIU_REPLACE) { /* If op is MPIU_REPLACE, we can directly bcast to leafdata. No intermediate buffer is needed. */ 182*855db38dSJunchao Zhang *leafbuf_mpi = (char *)link->lkey; 183*855db38dSJunchao Zhang *leafmtype_mpi = link->leafmtype; 184*855db38dSJunchao Zhang } else { /* Otherwise, op is a reduction. Have to allocate a buffer aside leafdata to apply the op. The buffer is either on host or device, depending on where leafdata is. */ 185*855db38dSJunchao Zhang if (!link->leafbuf[link->leafmtype]) {ierr = PetscMallocWithMemType(link->leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[link->leafmtype]);CHKERRQ(ierr);} 186*855db38dSJunchao Zhang *leafbuf_mpi = link->leafbuf[link->leafmtype]; 187*855db38dSJunchao Zhang *leafmtype_mpi = link->leafmtype; 188*855db38dSJunchao Zhang } 189*855db38dSJunchao Zhang PetscFunctionReturn(0); 190*855db38dSJunchao Zhang } 191*855db38dSJunchao Zhang 192eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 193dd5b3ca6SJunchao Zhang { 194dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 195eb02082bSJunchao Zhang PetscSFPack link; 196dd5b3ca6SJunchao Zhang PetscMPIInt sendcount; 197dd5b3ca6SJunchao Zhang MPI_Comm comm; 198*855db38dSJunchao Zhang const void *rootbuf_mpi; /* buffer used by MPI */ 199*855db38dSJunchao Zhang void *leafbuf_mpi; 200*855db38dSJunchao Zhang PetscMemType rootmtype_mpi,leafmtype_mpi; /* Seen by MPI */ 201dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 202dd5b3ca6SJunchao Zhang 203dd5b3ca6SJunchao Zhang PetscFunctionBegin; 204eb02082bSJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 205dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 206dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 207*855db38dSJunchao Zhang ierr = PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 208*855db38dSJunchao Zhang ierr = MPIU_Iallgatherv(rootbuf_mpi,sendcount,unit,leafbuf_mpi,dat->recvcounts,dat->displs,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype_mpi]);CHKERRQ(ierr); 209dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 210dd5b3ca6SJunchao Zhang } 211dd5b3ca6SJunchao Zhang 212eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 213dd5b3ca6SJunchao Zhang { 214dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 215eb02082bSJunchao Zhang PetscSFPack link; 216eb02082bSJunchao Zhang 217eb02082bSJunchao Zhang PetscFunctionBegin; 218eb02082bSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 219*855db38dSJunchao Zhang ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 220*855db38dSJunchao Zhang if (op != MPIU_REPLACE) { 221*855db38dSJunchao Zhang /* Have a leaf buffer aside leafdata to do Op */ 222*855db38dSJunchao Zhang ierr = PetscSFUnpackAndOpLeafData(sf,link,NULL,leafdata,op,PETSC_FALSE);CHKERRQ(ierr); 223*855db38dSJunchao Zhang } else if (leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { 224*855db38dSJunchao Zhang /* Just need to copy data in leafbuf on host to leafdata on device */ 225*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_DEVICE,PETSC_MEMTYPE_HOST,leafdata,link->leafbuf[PETSC_MEMTYPE_HOST],link->leafbuflen*link->unitbytes);CHKERRQ(ierr); 226*855db38dSJunchao Zhang } 227eb02082bSJunchao Zhang ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 228eb02082bSJunchao Zhang PetscFunctionReturn(0); 229eb02082bSJunchao Zhang } 230eb02082bSJunchao Zhang 231*855db38dSJunchao Zhang /* 232*855db38dSJunchao Zhang Prepare the rootbuf, leafbuf etc used by MPI in PetscSFReduceBegin. 233*855db38dSJunchao Zhang 234*855db38dSJunchao Zhang Input Arguments: 235*855db38dSJunchao Zhang + sf - the start forest 236*855db38dSJunchao Zhang . link - the link PetscSFReduceBegin is currently using 237*855db38dSJunchao Zhang - op - the reduction op 238*855db38dSJunchao Zhang 239*855db38dSJunchao Zhang Output Arguments: 240*855db38dSJunchao Zhang +rootmtype_mpi - memtype of rootbuf_mpi 241*855db38dSJunchao Zhang .rootbuf_mpi - root buffer used by MPI in the following MPI call 242*855db38dSJunchao Zhang .leafmtype_mpi - memtype of leafbuf_mpi 243*855db38dSJunchao Zhang -leafbuf_mpi - leaf buffer used by MPI in the following MPI call 244*855db38dSJunchao Zhang 245*855db38dSJunchao Zhang Notes: This function is called assuming op != MPIU_REPLACE. 246*855db38dSJunchao Zhang */ 247*855db38dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReducePrepareMPIBuffers_Allgatherv(PetscSF sf,PetscSFPack link,MPI_Op op,PetscMemType *rootmtype_mpi,void **rootbuf_mpi,PetscMemType *leafmtype_mpi,const void **leafbuf_mpi) 248*855db38dSJunchao Zhang { 249*855db38dSJunchao Zhang PetscErrorCode ierr; 250*855db38dSJunchao Zhang PetscMPIInt rank,count; 251*855db38dSJunchao Zhang MPI_Comm comm; 252*855db38dSJunchao Zhang const void *leafdata_mpi; 253*855db38dSJunchao Zhang 254*855db38dSJunchao Zhang PetscFunctionBegin; 255*855db38dSJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 256*855db38dSJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 257*855db38dSJunchao Zhang 258*855db38dSJunchao Zhang /* Step 1: Reduce leafdata on all ranks to leafbuf on rank 0 */ 259*855db38dSJunchao Zhang if (link->leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* Need to copy leafdata to leafbuf on every rank */ 260*855db38dSJunchao Zhang if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 261*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_HOST,PETSC_MEMTYPE_DEVICE,link->leafbuf[PETSC_MEMTYPE_HOST],link->lkey,link->leafbuflen*link->unitbytes);CHKERRQ(ierr); 262*855db38dSJunchao Zhang leafdata_mpi = !rank ? MPI_IN_PLACE : link->leafbuf[PETSC_MEMTYPE_HOST]; 263*855db38dSJunchao Zhang *leafmtype_mpi = PETSC_MEMTYPE_HOST; 264*855db38dSJunchao Zhang } else { /* Only need to allocate a leafbuf on rank 0. Then directly reduce leafdata to the leafbuf */ 265*855db38dSJunchao Zhang if (!rank && !link->leafbuf[link->leafmtype]) {ierr = PetscMallocWithMemType(link->leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[link->leafmtype]);CHKERRQ(ierr);} 266*855db38dSJunchao Zhang leafdata_mpi = link->lkey; 267*855db38dSJunchao Zhang *leafmtype_mpi = link->leafmtype; 268*855db38dSJunchao Zhang } 269*855db38dSJunchao Zhang *leafbuf_mpi = (const char*)link->leafbuf[*leafmtype_mpi]; 270*855db38dSJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr); 271*855db38dSJunchao Zhang ierr = MPI_Reduce(leafdata_mpi,(void*)(*leafbuf_mpi),count,link->basicunit,op,0,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */ 272*855db38dSJunchao Zhang 273*855db38dSJunchao Zhang /* Step 2: Prepare the root buffer (we'll scatter the reduction result to it in a moment) */ 274*855db38dSJunchao Zhang if (link->rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) *rootmtype_mpi = PETSC_MEMTYPE_HOST; 275*855db38dSJunchao Zhang else *rootmtype_mpi = link->rootmtype; 276*855db38dSJunchao Zhang 277*855db38dSJunchao Zhang if (!link->rootbuf[*rootmtype_mpi]) {ierr = PetscMallocWithMemType(*rootmtype_mpi,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[*rootmtype_mpi]);CHKERRQ(ierr);} 278*855db38dSJunchao Zhang *rootbuf_mpi = link->rootbuf[*rootmtype_mpi]; 279*855db38dSJunchao Zhang PetscFunctionReturn(0); 280*855db38dSJunchao Zhang } 281*855db38dSJunchao Zhang 282eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 283eb02082bSJunchao Zhang { 284eb02082bSJunchao Zhang PetscErrorCode ierr; 285eb02082bSJunchao Zhang PetscSFPack link; 286dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 287dd5b3ca6SJunchao Zhang PetscInt rstart; 288*855db38dSJunchao Zhang PetscMPIInt rank,recvcount; 289dd5b3ca6SJunchao Zhang MPI_Comm comm; 290*855db38dSJunchao Zhang const void *leafbuf_mpi; 291*855db38dSJunchao Zhang void *rootbuf_mpi; 292*855db38dSJunchao Zhang PetscMemType leafmtype_mpi,rootmtype_mpi; /* Seen by MPI */ 293dd5b3ca6SJunchao Zhang 294dd5b3ca6SJunchao Zhang PetscFunctionBegin; 295eb02082bSJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 296dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 297dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 298dd5b3ca6SJunchao Zhang 299dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 300*855db38dSJunchao Zhang /* REPLACE is only meaningful when all processes have the same leafdata to reduce. Therefore copy from local leafdata is fine */ 301dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 302eb02082bSJunchao Zhang ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr); 303dd5b3ca6SJunchao Zhang } else { 304dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr); 305*855db38dSJunchao Zhang ierr = PetscSFReducePrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 306*855db38dSJunchao Zhang ierr = MPIU_Iscatterv(leafbuf_mpi,dat->recvcounts,dat->displs,unit,rootbuf_mpi,recvcount,unit,0,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype_mpi]);CHKERRQ(ierr); 307dd5b3ca6SJunchao Zhang } 308dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 309dd5b3ca6SJunchao Zhang } 310dd5b3ca6SJunchao Zhang 311eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 312dd5b3ca6SJunchao Zhang { 313dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 314eb02082bSJunchao Zhang PetscSFPack link; 315dd5b3ca6SJunchao Zhang 316dd5b3ca6SJunchao Zhang PetscFunctionBegin; 317eb02082bSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 318*855db38dSJunchao Zhang ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 319*855db38dSJunchao Zhang if (op != MPIU_REPLACE) { 320*855db38dSJunchao Zhang ierr = PetscSFUnpackAndOpRootData(sf,link,NULL,rootdata,op,PETSC_FALSE);CHKERRQ(ierr); 321*855db38dSJunchao Zhang } else if (rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { 322*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_DEVICE,PETSC_MEMTYPE_HOST,rootdata,link->rootbuf[PETSC_MEMTYPE_HOST],link->rootbuflen*link->unitbytes);CHKERRQ(ierr); 323*855db38dSJunchao Zhang } 324eb02082bSJunchao Zhang ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 325eb02082bSJunchao Zhang PetscFunctionReturn(0); 326eb02082bSJunchao Zhang } 327eb02082bSJunchao Zhang 328eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastToZero_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata) 329eb02082bSJunchao Zhang { 330eb02082bSJunchao Zhang PetscErrorCode ierr; 331eb02082bSJunchao Zhang PetscSFPack link; 332*855db38dSJunchao Zhang PetscMPIInt rank; 333eb02082bSJunchao Zhang 334eb02082bSJunchao Zhang PetscFunctionBegin; 335eb02082bSJunchao Zhang ierr = PetscSFBcastAndOpBegin_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,MPIU_REPLACE);CHKERRQ(ierr); 336eb02082bSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 337*855db38dSJunchao Zhang ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 338*855db38dSJunchao Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 339*855db38dSJunchao Zhang if (!rank && leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { 340*855db38dSJunchao Zhang ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_DEVICE,PETSC_MEMTYPE_HOST,leafdata,link->leafbuf[PETSC_MEMTYPE_HOST],link->leafbuflen*link->unitbytes);CHKERRQ(ierr); 341*855db38dSJunchao Zhang } 342eb02082bSJunchao Zhang ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 343dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 344dd5b3ca6SJunchao Zhang } 345dd5b3ca6SJunchao Zhang 346dd5b3ca6SJunchao Zhang /* This routine is very tricky (I believe it is rarely used with this kind of graph so just provide a simple but not-optimal implementation). 347dd5b3ca6SJunchao Zhang 348dd5b3ca6SJunchao Zhang Suppose we have three ranks. Rank 0 has a root with value 1. Rank 0,1,2 has a leaf with value 2,3,4 respectively. The leaves are connected 349dd5b3ca6SJunchao Zhang to the root on rank 0. Suppose op=MPI_SUM and rank 0,1,2 gets root state in their rank order. By definition of this routine, rank 0 sees 1 350dd5b3ca6SJunchao Zhang in root, fetches it into its leafupate, then updates root to 1 + 2 = 3; rank 1 sees 3 in root, fetches it into its leafupate, then updates 351dd5b3ca6SJunchao Zhang root to 3 + 3 = 6; rank 2 sees 6 in root, fetches it into its leafupdate, then updates root to 6 + 4 = 10. At the end, leafupdate on rank 352dd5b3ca6SJunchao Zhang 0,1,2 is 1,3,6 respectively. root is 10. 353dd5b3ca6SJunchao Zhang 354dd5b3ca6SJunchao Zhang One optimized implementation could be: starting from the initial state: 355dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 356dd5b3ca6SJunchao Zhang Root 1 357dd5b3ca6SJunchao Zhang Leaf 2 3 4 358dd5b3ca6SJunchao Zhang 359dd5b3ca6SJunchao Zhang Shift leaves rightwards to leafupdate. Rank 0 gathers the root value and puts it in leafupdate. We have: 360dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 361dd5b3ca6SJunchao Zhang Root 1 362dd5b3ca6SJunchao Zhang Leaf 2 3 4 363dd5b3ca6SJunchao Zhang Leafupdate 1 2 3 364dd5b3ca6SJunchao Zhang 365dd5b3ca6SJunchao Zhang Then, do MPI_Scan on leafupdate and get: 366dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 367dd5b3ca6SJunchao Zhang Root 1 368dd5b3ca6SJunchao Zhang Leaf 2 3 4 369dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 370dd5b3ca6SJunchao Zhang 371dd5b3ca6SJunchao Zhang Rank 2 sums its leaf and leafupdate, scatters the result to the root, and gets 372dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 373dd5b3ca6SJunchao Zhang Root 10 374dd5b3ca6SJunchao Zhang Leaf 2 3 4 375dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 376dd5b3ca6SJunchao Zhang 377dd5b3ca6SJunchao Zhang We use a simpler implementation. From the same initial state, we copy leafdata to leafupdate 378dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 379dd5b3ca6SJunchao Zhang Root 1 380dd5b3ca6SJunchao Zhang Leaf 2 3 4 381dd5b3ca6SJunchao Zhang Leafupdate 2 3 4 382dd5b3ca6SJunchao Zhang 383dd5b3ca6SJunchao Zhang Do MPI_Exscan on leafupdate, 384dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 385dd5b3ca6SJunchao Zhang Root 1 386dd5b3ca6SJunchao Zhang Leaf 2 3 4 387dd5b3ca6SJunchao Zhang Leafupdate 2 2 5 388dd5b3ca6SJunchao Zhang 389dd5b3ca6SJunchao Zhang BcastAndOp from root to leafupdate, 390dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 391dd5b3ca6SJunchao Zhang Root 1 392dd5b3ca6SJunchao Zhang Leaf 2 3 4 393dd5b3ca6SJunchao Zhang Leafupdate 3 3 6 394dd5b3ca6SJunchao Zhang 395dd5b3ca6SJunchao Zhang Copy root to leafupdate on rank-0 396dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 397dd5b3ca6SJunchao Zhang Root 1 398dd5b3ca6SJunchao Zhang Leaf 2 3 4 399dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 400dd5b3ca6SJunchao Zhang 401dd5b3ca6SJunchao Zhang Reduce from leaf to root, 402dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 403dd5b3ca6SJunchao Zhang Root 10 404dd5b3ca6SJunchao Zhang Leaf 2 3 4 405dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 406dd5b3ca6SJunchao Zhang */ 407eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 408dd5b3ca6SJunchao Zhang { 409dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 410eb02082bSJunchao Zhang PetscSFPack link; 411dd5b3ca6SJunchao Zhang MPI_Comm comm; 412dd5b3ca6SJunchao Zhang PetscMPIInt count; 413dd5b3ca6SJunchao Zhang 414dd5b3ca6SJunchao Zhang PetscFunctionBegin; 415*855db38dSJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 416*855db38dSJunchao Zhang if (!use_gpu_aware_mpi && (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE)) SETERRQ(comm,PETSC_ERR_SUP,"No support for FetchAndOp"); /* No known uses */ 417dd5b3ca6SJunchao Zhang /* Copy leafdata to leafupdate */ 418eb02082bSJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 419eb02082bSJunchao Zhang ierr = PetscMemcpyWithMemType(leafmtype,leafmtype,leafupdate,leafdata,sf->nleaves*link->unitbytes);CHKERRQ(ierr); 420eb02082bSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 421dd5b3ca6SJunchao Zhang 422dd5b3ca6SJunchao Zhang /* Exscan on leafupdate and then BcastAndOp rootdata to leafupdate */ 423dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr); 424dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 425dd5b3ca6SJunchao Zhang PetscMPIInt size,rank,prev,next; 426dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 427dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 428dd5b3ca6SJunchao Zhang prev = rank ? rank-1 : MPI_PROC_NULL; 429dd5b3ca6SJunchao Zhang next = (rank < size-1) ? rank+1 : MPI_PROC_NULL; 430dd5b3ca6SJunchao Zhang ierr = MPI_Sendrecv_replace(leafupdate,count,unit,next,link->tag,prev,link->tag,comm,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 431dd5b3ca6SJunchao Zhang } else {ierr = MPI_Exscan(MPI_IN_PLACE,leafupdate,count,unit,op,comm);CHKERRQ(ierr);} 432eb02082bSJunchao Zhang ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 433dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 434dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 435dd5b3ca6SJunchao Zhang 436dd5b3ca6SJunchao Zhang /* Bcast roots to rank 0's leafupdate */ 437dd5b3ca6SJunchao Zhang ierr = PetscSFBcastToZero_Private(sf,unit,rootdata,leafupdate);CHKERRQ(ierr); /* Using this line makes Allgather SFs able to inherit this routine */ 438dd5b3ca6SJunchao Zhang 439dd5b3ca6SJunchao Zhang /* Reduce leafdata to rootdata */ 440dd5b3ca6SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 441dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 442dd5b3ca6SJunchao Zhang } 443dd5b3ca6SJunchao Zhang 444eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 445dd5b3ca6SJunchao Zhang { 446dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 447dd5b3ca6SJunchao Zhang 448dd5b3ca6SJunchao Zhang PetscFunctionBegin; 449dd5b3ca6SJunchao Zhang ierr = PetscSFReduceEnd(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 450dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 451dd5b3ca6SJunchao Zhang } 452dd5b3ca6SJunchao Zhang 453dd5b3ca6SJunchao Zhang /* Get root ranks accessing my leaves */ 454dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetRootRanks_Allgatherv(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 455dd5b3ca6SJunchao Zhang { 456dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 457dd5b3ca6SJunchao Zhang PetscInt i,j,k,size; 458dd5b3ca6SJunchao Zhang const PetscInt *range; 459dd5b3ca6SJunchao Zhang 460dd5b3ca6SJunchao Zhang PetscFunctionBegin; 461dd5b3ca6SJunchao Zhang /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 462dd5b3ca6SJunchao Zhang if (sf->nranks && !sf->ranks) { /* On rank!=0, sf->nranks=0. The sf->nranks test makes this routine also works for sfgatherv */ 463dd5b3ca6SJunchao Zhang size = sf->nranks; 464dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 465dd5b3ca6SJunchao Zhang ierr = PetscMalloc4(size,&sf->ranks,size+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 466dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) sf->ranks[i] = i; 467da2e4c71SJunchao Zhang ierr = PetscArraycpy(sf->roffset,range,size+1);CHKERRQ(ierr); 468dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) sf->rmine[i] = i; /*rmine are never NULL even for contiguous leaves */ 469dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 470dd5b3ca6SJunchao Zhang for (j=range[i],k=0; j<range[i+1]; j++,k++) sf->rremote[j] = k; 471dd5b3ca6SJunchao Zhang } 472dd5b3ca6SJunchao Zhang } 473dd5b3ca6SJunchao Zhang 474dd5b3ca6SJunchao Zhang if (nranks) *nranks = sf->nranks; 475dd5b3ca6SJunchao Zhang if (ranks) *ranks = sf->ranks; 476dd5b3ca6SJunchao Zhang if (roffset) *roffset = sf->roffset; 477dd5b3ca6SJunchao Zhang if (rmine) *rmine = sf->rmine; 478dd5b3ca6SJunchao Zhang if (rremote) *rremote = sf->rremote; 479dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 480dd5b3ca6SJunchao Zhang } 481dd5b3ca6SJunchao Zhang 482dd5b3ca6SJunchao Zhang /* Get leaf ranks accessing my roots */ 483dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Allgatherv(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 484dd5b3ca6SJunchao Zhang { 485dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 486dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 487dd5b3ca6SJunchao Zhang MPI_Comm comm; 488dd5b3ca6SJunchao Zhang PetscMPIInt size,rank; 489dd5b3ca6SJunchao Zhang PetscInt i,j; 490dd5b3ca6SJunchao Zhang 491dd5b3ca6SJunchao Zhang PetscFunctionBegin; 492dd5b3ca6SJunchao Zhang /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 493dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 494dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 495dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 496dd5b3ca6SJunchao Zhang if (niranks) *niranks = size; 497dd5b3ca6SJunchao Zhang 498dd5b3ca6SJunchao Zhang /* PetscSF_Basic has distinguished incoming ranks. Here we do not need that. But we must put self as the first and 499dd5b3ca6SJunchao Zhang sort other ranks. See comments in PetscSFSetUp_Basic about MatGetBrowsOfAoCols_MPIAIJ on why. 500dd5b3ca6SJunchao Zhang */ 501dd5b3ca6SJunchao Zhang if (iranks) { 502dd5b3ca6SJunchao Zhang if (!dat->iranks) { 503dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->iranks);CHKERRQ(ierr); 504dd5b3ca6SJunchao Zhang dat->iranks[0] = rank; 505dd5b3ca6SJunchao Zhang for (i=0,j=1; i<size; i++) {if (i == rank) continue; dat->iranks[j++] = i;} 506dd5b3ca6SJunchao Zhang } 507dd5b3ca6SJunchao Zhang *iranks = dat->iranks; /* dat->iranks was init'ed to NULL by PetscNewLog */ 508dd5b3ca6SJunchao Zhang } 509dd5b3ca6SJunchao Zhang 510dd5b3ca6SJunchao Zhang if (ioffset) { 511dd5b3ca6SJunchao Zhang if (!dat->ioffset) { 512dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size+1,&dat->ioffset);CHKERRQ(ierr); 513dd5b3ca6SJunchao Zhang for (i=0; i<=size; i++) dat->ioffset[i] = i*sf->nroots; 514dd5b3ca6SJunchao Zhang } 515dd5b3ca6SJunchao Zhang *ioffset = dat->ioffset; 516dd5b3ca6SJunchao Zhang } 517dd5b3ca6SJunchao Zhang 518dd5b3ca6SJunchao Zhang if (irootloc) { 519dd5b3ca6SJunchao Zhang if (!dat->irootloc) { 520dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&dat->irootloc);CHKERRQ(ierr); 521dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 522dd5b3ca6SJunchao Zhang for (j=0; j<sf->nroots; j++) dat->irootloc[i*sf->nroots+j] = j; 523dd5b3ca6SJunchao Zhang } 524dd5b3ca6SJunchao Zhang } 525dd5b3ca6SJunchao Zhang *irootloc = dat->irootloc; 526dd5b3ca6SJunchao Zhang } 527dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 528dd5b3ca6SJunchao Zhang } 529dd5b3ca6SJunchao Zhang 530dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Allgatherv(PetscSF sf,PetscSF *out) 531dd5b3ca6SJunchao Zhang { 532dd5b3ca6SJunchao Zhang PetscInt i,nroots,nleaves,rstart,*ilocal; 533dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 534dd5b3ca6SJunchao Zhang PetscSF lsf; 535dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 536dd5b3ca6SJunchao Zhang 537dd5b3ca6SJunchao Zhang PetscFunctionBegin; 538eb02082bSJunchao Zhang nleaves = sf->nleaves ? sf->nroots : 0; /* sf->nleaves can be zero with SFGather(v) */ 539eb02082bSJunchao Zhang nroots = nleaves; 540dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 541dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 542dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 543dd5b3ca6SJunchao Zhang 544dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 545dd5b3ca6SJunchao Zhang ilocal[i] = rstart + i; /* lsf does not change leave indices */ 546dd5b3ca6SJunchao Zhang iremote[i].rank = 0; /* rank in PETSC_COMM_SELF */ 547dd5b3ca6SJunchao Zhang iremote[i].index = i; /* root index */ 548dd5b3ca6SJunchao Zhang } 549dd5b3ca6SJunchao Zhang 550dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 551dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 552dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 553dd5b3ca6SJunchao Zhang *out = lsf; 554dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 555dd5b3ca6SJunchao Zhang } 556dd5b3ca6SJunchao Zhang 557dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Allgatherv(PetscSF sf) 558dd5b3ca6SJunchao Zhang { 559dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 560dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 561dd5b3ca6SJunchao Zhang 562dd5b3ca6SJunchao Zhang PetscFunctionBegin; 563dd5b3ca6SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgatherv; 564dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 565dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 566dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 567dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 568dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 569dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgatherv; 570dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 571dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Allgatherv; 572dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 573dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv; 574dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 575dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 576dd5b3ca6SJunchao Zhang sf->ops->BcastToZero = PetscSFBcastToZero_Allgatherv; 577dd5b3ca6SJunchao Zhang 578dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 579dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 580dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 581dd5b3ca6SJunchao Zhang } 582