1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang 3dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF,MPI_Datatype,const void*,void*,MPI_Op); 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /*===================================================================================*/ 6dd5b3ca6SJunchao Zhang /* Internal routines for PetscSFPack_Allgatherv */ 7dd5b3ca6SJunchao Zhang /*===================================================================================*/ 8dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGet_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFPack_Allgatherv *mylink) 9dd5b3ca6SJunchao Zhang { 10dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 11dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 12dd5b3ca6SJunchao Zhang PetscSFPack *p; 13dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 14dd5b3ca6SJunchao Zhang 15dd5b3ca6SJunchao Zhang PetscFunctionBegin; 16dd5b3ca6SJunchao Zhang /* Look for types in cache */ 17dd5b3ca6SJunchao Zhang for (p=&dat->avail; (link=(PetscSFPack_Allgatherv)*p); p=&link->next) { 18dd5b3ca6SJunchao Zhang PetscBool match; 19dd5b3ca6SJunchao Zhang ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 20dd5b3ca6SJunchao Zhang if (match) { 21dd5b3ca6SJunchao Zhang *p = link->next; /* Remove from available list */ 22dd5b3ca6SJunchao Zhang goto found; 23dd5b3ca6SJunchao Zhang } 24dd5b3ca6SJunchao Zhang } 25dd5b3ca6SJunchao Zhang 26dd5b3ca6SJunchao Zhang ierr = PetscNew(&link);CHKERRQ(ierr); 27dd5b3ca6SJunchao Zhang ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 28dd5b3ca6SJunchao Zhang /* Must be init'ed to NULL so that we can safely wait on them even they are not used */ 29dd5b3ca6SJunchao Zhang link->request = MPI_REQUEST_NULL; 30dd5b3ca6SJunchao Zhang /* One tag per link */ 31dd5b3ca6SJunchao Zhang ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 32dd5b3ca6SJunchao Zhang 33dd5b3ca6SJunchao Zhang /* DO NOT allocate link->root/leaf. We use lazy allocation since these buffers are likely not needed */ 34dd5b3ca6SJunchao Zhang found: 35dd5b3ca6SJunchao Zhang link->key = key; 36dd5b3ca6SJunchao Zhang link->next = dat->inuse; 37dd5b3ca6SJunchao Zhang dat->inuse = (PetscSFPack)link; 38dd5b3ca6SJunchao Zhang 39dd5b3ca6SJunchao Zhang *mylink = link; 40dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 41dd5b3ca6SJunchao Zhang } 42dd5b3ca6SJunchao Zhang 43dd5b3ca6SJunchao Zhang /*===================================================================================*/ 44dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 45dd5b3ca6SJunchao Zhang /*===================================================================================*/ 46dd5b3ca6SJunchao Zhang 47dd5b3ca6SJunchao Zhang /* PetscSFGetGraph is non-collective. An implementation should not have collective calls */ 48dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetGraph_Allgatherv(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 49dd5b3ca6SJunchao Zhang { 50dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 51dd5b3ca6SJunchao Zhang PetscInt i,j,k; 52dd5b3ca6SJunchao Zhang const PetscInt *range; 53dd5b3ca6SJunchao Zhang PetscMPIInt size; 54dd5b3ca6SJunchao Zhang 55dd5b3ca6SJunchao Zhang PetscFunctionBegin; 56dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 57dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 58dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 59dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous leaves */ 60dd5b3ca6SJunchao Zhang if (iremote) { 61dd5b3ca6SJunchao Zhang if (!sf->remote && sf->nleaves) { /* The && sf->nleaves makes sfgatherv able to inherit this routine */ 62dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 63dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 64dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 65dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 66dd5b3ca6SJunchao Zhang for (j=range[i],k=0; j<range[i+1]; j++,k++) { 67dd5b3ca6SJunchao Zhang sf->remote[j].rank = i; 68dd5b3ca6SJunchao Zhang sf->remote[j].index = k; 69dd5b3ca6SJunchao Zhang } 70dd5b3ca6SJunchao Zhang } 71dd5b3ca6SJunchao Zhang } 72dd5b3ca6SJunchao Zhang *iremote = sf->remote; 73dd5b3ca6SJunchao Zhang } 74dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 75dd5b3ca6SJunchao Zhang } 76dd5b3ca6SJunchao Zhang 77dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Allgatherv(PetscSF sf) 78dd5b3ca6SJunchao Zhang { 79dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 80dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 81dd5b3ca6SJunchao Zhang PetscMPIInt size; 82dd5b3ca6SJunchao Zhang PetscInt i; 83dd5b3ca6SJunchao Zhang const PetscInt *range; 84dd5b3ca6SJunchao Zhang 85dd5b3ca6SJunchao Zhang PetscFunctionBegin; 86dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 87dd5b3ca6SJunchao Zhang if (sf->nleaves) { /* This if (sf->nleaves) test makes sfgatherv able to inherit this routine */ 88dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->recvcounts);CHKERRQ(ierr); 89dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->displs);CHKERRQ(ierr); 90dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 91dd5b3ca6SJunchao Zhang 92dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 93dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(range[i],&dat->displs[i]);CHKERRQ(ierr); 94dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(range[i+1]-range[i],&dat->recvcounts[i]);CHKERRQ(ierr); 95dd5b3ca6SJunchao Zhang } 96dd5b3ca6SJunchao Zhang } 97dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 98dd5b3ca6SJunchao Zhang } 99dd5b3ca6SJunchao Zhang 100dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Allgatherv(PetscSF sf) 101dd5b3ca6SJunchao Zhang { 102dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 103dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link,next; 104dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 105dd5b3ca6SJunchao Zhang 106dd5b3ca6SJunchao Zhang PetscFunctionBegin; 107dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->iranks);CHKERRQ(ierr); 108dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->ioffset);CHKERRQ(ierr); 109dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->irootloc);CHKERRQ(ierr); 110dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->recvcounts);CHKERRQ(ierr); 111dd5b3ca6SJunchao Zhang ierr = PetscFree(dat->displs);CHKERRQ(ierr); 112dd5b3ca6SJunchao Zhang if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 113dd5b3ca6SJunchao Zhang for (link=(PetscSFPack_Allgatherv)dat->avail; link; link=next) { 114dd5b3ca6SJunchao Zhang next = (PetscSFPack_Allgatherv)link->next; 115dd5b3ca6SJunchao Zhang if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 116dd5b3ca6SJunchao Zhang ierr = PetscFree(link->root);CHKERRQ(ierr); 117dd5b3ca6SJunchao Zhang ierr = PetscFree(link->leaf);CHKERRQ(ierr); 118dd5b3ca6SJunchao Zhang ierr = PetscFree(link);CHKERRQ(ierr); 119dd5b3ca6SJunchao Zhang } 120dd5b3ca6SJunchao Zhang dat->avail = NULL; 121dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 122dd5b3ca6SJunchao Zhang } 123dd5b3ca6SJunchao Zhang 124dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Allgatherv(PetscSF sf) 125dd5b3ca6SJunchao Zhang { 126dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 127dd5b3ca6SJunchao Zhang 128dd5b3ca6SJunchao Zhang PetscFunctionBegin; 129dd5b3ca6SJunchao Zhang ierr = PetscSFReset_Allgatherv(sf);CHKERRQ(ierr); 130dd5b3ca6SJunchao Zhang ierr = PetscFree(sf->data);CHKERRQ(ierr); 131dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 132dd5b3ca6SJunchao Zhang } 133dd5b3ca6SJunchao Zhang 134dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 135dd5b3ca6SJunchao Zhang { 136dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 137dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 138dd5b3ca6SJunchao Zhang PetscMPIInt sendcount; 139dd5b3ca6SJunchao Zhang MPI_Comm comm; 140dd5b3ca6SJunchao Zhang void *recvbuf; 141dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 142dd5b3ca6SJunchao Zhang 143dd5b3ca6SJunchao Zhang PetscFunctionBegin; 144dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,rootdata,&link);CHKERRQ(ierr); 145dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 146dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 147dd5b3ca6SJunchao Zhang 148dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 149dd5b3ca6SJunchao Zhang recvbuf = leafdata; 150dd5b3ca6SJunchao Zhang } else { 151dd5b3ca6SJunchao Zhang if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 152dd5b3ca6SJunchao Zhang recvbuf = link->leaf; 153dd5b3ca6SJunchao Zhang } 154dd5b3ca6SJunchao Zhang ierr = MPIU_Iallgatherv(rootdata,sendcount,unit,recvbuf,dat->recvcounts,dat->displs,unit,comm,&link->request);CHKERRQ(ierr); 155dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 156dd5b3ca6SJunchao Zhang } 157dd5b3ca6SJunchao Zhang 158dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 159dd5b3ca6SJunchao Zhang { 160dd5b3ca6SJunchao Zhang PetscErrorCode ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 161dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 162dd5b3ca6SJunchao Zhang 163dd5b3ca6SJunchao Zhang PetscFunctionBegin; 164dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 165dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 166dd5b3ca6SJunchao Zhang ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 167dd5b3ca6SJunchao Zhang 168dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 169dd5b3ca6SJunchao Zhang if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nleaves,link->bs,NULL,0,NULL,leafdata,link->leaf);CHKERRQ(ierr);} 170dd5b3ca6SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 171dd5b3ca6SJunchao Zhang else { 172dd5b3ca6SJunchao Zhang /* op might be user-defined */ 173dd5b3ca6SJunchao Zhang PetscMPIInt count; 174dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr); 175dd5b3ca6SJunchao Zhang ierr = MPI_Reduce_local(link->leaf,leafdata,count,unit,op);CHKERRQ(ierr); 176dd5b3ca6SJunchao Zhang } 177dd5b3ca6SJunchao Zhang #else 178dd5b3ca6SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 179dd5b3ca6SJunchao Zhang #endif 180dd5b3ca6SJunchao Zhang } 181dd5b3ca6SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 182dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 183dd5b3ca6SJunchao Zhang } 184dd5b3ca6SJunchao Zhang 185dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 186dd5b3ca6SJunchao Zhang { 187dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 188dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 189dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 190dd5b3ca6SJunchao Zhang PetscInt rstart; 191dd5b3ca6SJunchao Zhang PetscMPIInt rank,count,recvcount; 192dd5b3ca6SJunchao Zhang MPI_Comm comm; 193dd5b3ca6SJunchao Zhang 194dd5b3ca6SJunchao Zhang PetscFunctionBegin; 195dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,leafdata,&link);CHKERRQ(ierr); 196dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 197dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198dd5b3ca6SJunchao Zhang 199dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 200dd5b3ca6SJunchao Zhang /* REPLACE is only meaningful when all processes have the same leafdata to readue. Therefore copy from local leafdata is fine */ 201dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 202dd5b3ca6SJunchao Zhang ierr = PetscMemcpy(rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr); 203dd5b3ca6SJunchao Zhang } else { 204dd5b3ca6SJunchao Zhang /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */ 205dd5b3ca6SJunchao Zhang if (!rank && !link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 206dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr); 207dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr); 208dd5b3ca6SJunchao Zhang ierr = MPI_Reduce(leafdata,link->leaf,count,link->basicunit,op,0,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */ 209dd5b3ca6SJunchao Zhang if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);} 210dd5b3ca6SJunchao Zhang ierr = MPIU_Iscatterv(link->leaf,dat->recvcounts,dat->displs,unit,link->root,recvcount,unit,0,comm,&link->request);CHKERRQ(ierr); 211dd5b3ca6SJunchao Zhang } 212dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 213dd5b3ca6SJunchao Zhang } 214dd5b3ca6SJunchao Zhang 215dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 216dd5b3ca6SJunchao Zhang { 217dd5b3ca6SJunchao Zhang PetscErrorCode ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 218dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 219dd5b3ca6SJunchao Zhang 220dd5b3ca6SJunchao Zhang PetscFunctionBegin; 221dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 222dd5b3ca6SJunchao Zhang ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 223dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 224dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 225dd5b3ca6SJunchao Zhang if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nroots,link->bs,NULL,0,NULL,rootdata,link->root);CHKERRQ(ierr);} 226dd5b3ca6SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 227dd5b3ca6SJunchao Zhang else if (sf->nroots) { 228dd5b3ca6SJunchao Zhang /* op might be user-defined */ 229dd5b3ca6SJunchao Zhang PetscMPIInt count; 230dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&count);CHKERRQ(ierr); 231dd5b3ca6SJunchao Zhang ierr = MPI_Reduce_local(link->root,rootdata,count,unit,op);CHKERRQ(ierr); 232dd5b3ca6SJunchao Zhang } 233dd5b3ca6SJunchao Zhang #else 234dd5b3ca6SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 235dd5b3ca6SJunchao Zhang #endif 236dd5b3ca6SJunchao Zhang } 237dd5b3ca6SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 238dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 239dd5b3ca6SJunchao Zhang } 240dd5b3ca6SJunchao Zhang 241dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFBcastToZero_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 242dd5b3ca6SJunchao Zhang { 243dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 244dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 245dd5b3ca6SJunchao Zhang 246dd5b3ca6SJunchao Zhang PetscFunctionBegin; 247dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpBegin_Gatherv(sf,unit,rootdata,leafdata,MPIU_REPLACE);CHKERRQ(ierr); 248dd5b3ca6SJunchao Zhang /* A simplified PetscSFBcastAndOpEnd_Allgatherv */ 249dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 250dd5b3ca6SJunchao Zhang ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 251dd5b3ca6SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 252dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 253dd5b3ca6SJunchao Zhang } 254dd5b3ca6SJunchao Zhang 255dd5b3ca6SJunchao Zhang /* This routine is very tricky (I believe it is rarely used with this kind of graph so just provide a simple but not-optimal implementation). 256dd5b3ca6SJunchao Zhang 257dd5b3ca6SJunchao Zhang Suppose we have three ranks. Rank 0 has a root with value 1. Rank 0,1,2 has a leaf with value 2,3,4 respectively. The leaves are connected 258dd5b3ca6SJunchao Zhang to the root on rank 0. Suppose op=MPI_SUM and rank 0,1,2 gets root state in their rank order. By definition of this routine, rank 0 sees 1 259dd5b3ca6SJunchao Zhang in root, fetches it into its leafupate, then updates root to 1 + 2 = 3; rank 1 sees 3 in root, fetches it into its leafupate, then updates 260dd5b3ca6SJunchao Zhang root to 3 + 3 = 6; rank 2 sees 6 in root, fetches it into its leafupdate, then updates root to 6 + 4 = 10. At the end, leafupdate on rank 261dd5b3ca6SJunchao Zhang 0,1,2 is 1,3,6 respectively. root is 10. 262dd5b3ca6SJunchao Zhang 263dd5b3ca6SJunchao Zhang One optimized implementation could be: starting from the initial state: 264dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 265dd5b3ca6SJunchao Zhang Root 1 266dd5b3ca6SJunchao Zhang Leaf 2 3 4 267dd5b3ca6SJunchao Zhang 268dd5b3ca6SJunchao Zhang Shift leaves rightwards to leafupdate. Rank 0 gathers the root value and puts it in leafupdate. We have: 269dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 270dd5b3ca6SJunchao Zhang Root 1 271dd5b3ca6SJunchao Zhang Leaf 2 3 4 272dd5b3ca6SJunchao Zhang Leafupdate 1 2 3 273dd5b3ca6SJunchao Zhang 274dd5b3ca6SJunchao Zhang Then, do MPI_Scan on leafupdate and get: 275dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 276dd5b3ca6SJunchao Zhang Root 1 277dd5b3ca6SJunchao Zhang Leaf 2 3 4 278dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 279dd5b3ca6SJunchao Zhang 280dd5b3ca6SJunchao Zhang Rank 2 sums its leaf and leafupdate, scatters the result to the root, and gets 281dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 282dd5b3ca6SJunchao Zhang Root 10 283dd5b3ca6SJunchao Zhang Leaf 2 3 4 284dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 285dd5b3ca6SJunchao Zhang 286dd5b3ca6SJunchao Zhang We use a simpler implementation. From the same initial state, we copy leafdata to leafupdate 287dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 288dd5b3ca6SJunchao Zhang Root 1 289dd5b3ca6SJunchao Zhang Leaf 2 3 4 290dd5b3ca6SJunchao Zhang Leafupdate 2 3 4 291dd5b3ca6SJunchao Zhang 292dd5b3ca6SJunchao Zhang Do MPI_Exscan on leafupdate, 293dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 294dd5b3ca6SJunchao Zhang Root 1 295dd5b3ca6SJunchao Zhang Leaf 2 3 4 296dd5b3ca6SJunchao Zhang Leafupdate 2 2 5 297dd5b3ca6SJunchao Zhang 298dd5b3ca6SJunchao Zhang BcastAndOp from root to leafupdate, 299dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 300dd5b3ca6SJunchao Zhang Root 1 301dd5b3ca6SJunchao Zhang Leaf 2 3 4 302dd5b3ca6SJunchao Zhang Leafupdate 3 3 6 303dd5b3ca6SJunchao Zhang 304dd5b3ca6SJunchao Zhang Copy root to leafupdate on rank-0 305dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 306dd5b3ca6SJunchao Zhang Root 1 307dd5b3ca6SJunchao Zhang Leaf 2 3 4 308dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 309dd5b3ca6SJunchao Zhang 310dd5b3ca6SJunchao Zhang Reduce from leaf to root, 311dd5b3ca6SJunchao Zhang rank-0 rank-1 rank-2 312dd5b3ca6SJunchao Zhang Root 10 313dd5b3ca6SJunchao Zhang Leaf 2 3 4 314dd5b3ca6SJunchao Zhang Leafupdate 1 3 6 315dd5b3ca6SJunchao Zhang */ 316dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 317dd5b3ca6SJunchao Zhang { 318dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 319dd5b3ca6SJunchao Zhang PetscSFPack_Allgatherv link; 320dd5b3ca6SJunchao Zhang MPI_Comm comm; 321dd5b3ca6SJunchao Zhang PetscMPIInt count; 322dd5b3ca6SJunchao Zhang 323dd5b3ca6SJunchao Zhang PetscFunctionBegin; 324dd5b3ca6SJunchao Zhang /* Copy leafdata to leafupdate */ 325dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Allgatherv(sf,unit,leafdata,&link);CHKERRQ(ierr); 326dd5b3ca6SJunchao Zhang ierr = PetscMemcpy(leafupdate,leafdata,sf->nleaves*link->unitbytes);CHKERRQ(ierr); 327dd5b3ca6SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 328dd5b3ca6SJunchao Zhang 329dd5b3ca6SJunchao Zhang /* Exscan on leafupdate and then BcastAndOp rootdata to leafupdate */ 330dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 331dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr); 332dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 333dd5b3ca6SJunchao Zhang PetscMPIInt size,rank,prev,next; 334dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 335dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 336dd5b3ca6SJunchao Zhang prev = rank ? rank-1 : MPI_PROC_NULL; 337dd5b3ca6SJunchao Zhang next = (rank < size-1) ? rank+1 : MPI_PROC_NULL; 338dd5b3ca6SJunchao Zhang ierr = MPI_Sendrecv_replace(leafupdate,count,unit,next,link->tag,prev,link->tag,comm,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 339dd5b3ca6SJunchao Zhang } else {ierr = MPI_Exscan(MPI_IN_PLACE,leafupdate,count,unit,op,comm);CHKERRQ(ierr);} 340dd5b3ca6SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 341dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 342dd5b3ca6SJunchao Zhang ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 343dd5b3ca6SJunchao Zhang 344dd5b3ca6SJunchao Zhang /* Bcast roots to rank 0's leafupdate */ 345dd5b3ca6SJunchao Zhang ierr = PetscSFBcastToZero_Private(sf,unit,rootdata,leafupdate);CHKERRQ(ierr); /* Using this line makes Allgather SFs able to inherit this routine */ 346dd5b3ca6SJunchao Zhang 347dd5b3ca6SJunchao Zhang /* Reduce leafdata to rootdata */ 348dd5b3ca6SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 349dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 350dd5b3ca6SJunchao Zhang } 351dd5b3ca6SJunchao Zhang 352dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 353dd5b3ca6SJunchao Zhang { 354dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 355dd5b3ca6SJunchao Zhang 356dd5b3ca6SJunchao Zhang PetscFunctionBegin; 357dd5b3ca6SJunchao Zhang ierr = PetscSFReduceEnd(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 358dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 359dd5b3ca6SJunchao Zhang } 360dd5b3ca6SJunchao Zhang 361dd5b3ca6SJunchao Zhang /* Get root ranks accessing my leaves */ 362dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetRootRanks_Allgatherv(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 363dd5b3ca6SJunchao Zhang { 364dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 365dd5b3ca6SJunchao Zhang PetscInt i,j,k,size; 366dd5b3ca6SJunchao Zhang const PetscInt *range; 367dd5b3ca6SJunchao Zhang 368dd5b3ca6SJunchao Zhang PetscFunctionBegin; 369dd5b3ca6SJunchao Zhang /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 370dd5b3ca6SJunchao Zhang if (sf->nranks && !sf->ranks) { /* On rank!=0, sf->nranks=0. The sf->nranks test makes this routine also works for sfgatherv */ 371dd5b3ca6SJunchao Zhang size = sf->nranks; 372dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 373dd5b3ca6SJunchao Zhang ierr = PetscMalloc4(size,&sf->ranks,size+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 374dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) sf->ranks[i] = i; 375*da2e4c71SJunchao Zhang ierr = PetscArraycpy(sf->roffset,range,size+1);CHKERRQ(ierr); 376dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) sf->rmine[i] = i; /*rmine are never NULL even for contiguous leaves */ 377dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 378dd5b3ca6SJunchao Zhang for (j=range[i],k=0; j<range[i+1]; j++,k++) sf->rremote[j] = k; 379dd5b3ca6SJunchao Zhang } 380dd5b3ca6SJunchao Zhang } 381dd5b3ca6SJunchao Zhang 382dd5b3ca6SJunchao Zhang if (nranks) *nranks = sf->nranks; 383dd5b3ca6SJunchao Zhang if (ranks) *ranks = sf->ranks; 384dd5b3ca6SJunchao Zhang if (roffset) *roffset = sf->roffset; 385dd5b3ca6SJunchao Zhang if (rmine) *rmine = sf->rmine; 386dd5b3ca6SJunchao Zhang if (rremote) *rremote = sf->rremote; 387dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 388dd5b3ca6SJunchao Zhang } 389dd5b3ca6SJunchao Zhang 390dd5b3ca6SJunchao Zhang /* Get leaf ranks accessing my roots */ 391dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Allgatherv(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 392dd5b3ca6SJunchao Zhang { 393dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 394dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 395dd5b3ca6SJunchao Zhang MPI_Comm comm; 396dd5b3ca6SJunchao Zhang PetscMPIInt size,rank; 397dd5b3ca6SJunchao Zhang PetscInt i,j; 398dd5b3ca6SJunchao Zhang 399dd5b3ca6SJunchao Zhang PetscFunctionBegin; 400dd5b3ca6SJunchao Zhang /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 401dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 402dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 403dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 404dd5b3ca6SJunchao Zhang if (niranks) *niranks = size; 405dd5b3ca6SJunchao Zhang 406dd5b3ca6SJunchao Zhang /* PetscSF_Basic has distinguished incoming ranks. Here we do not need that. But we must put self as the first and 407dd5b3ca6SJunchao Zhang sort other ranks. See comments in PetscSFSetUp_Basic about MatGetBrowsOfAoCols_MPIAIJ on why. 408dd5b3ca6SJunchao Zhang */ 409dd5b3ca6SJunchao Zhang if (iranks) { 410dd5b3ca6SJunchao Zhang if (!dat->iranks) { 411dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size,&dat->iranks);CHKERRQ(ierr); 412dd5b3ca6SJunchao Zhang dat->iranks[0] = rank; 413dd5b3ca6SJunchao Zhang for (i=0,j=1; i<size; i++) {if (i == rank) continue; dat->iranks[j++] = i;} 414dd5b3ca6SJunchao Zhang } 415dd5b3ca6SJunchao Zhang *iranks = dat->iranks; /* dat->iranks was init'ed to NULL by PetscNewLog */ 416dd5b3ca6SJunchao Zhang } 417dd5b3ca6SJunchao Zhang 418dd5b3ca6SJunchao Zhang if (ioffset) { 419dd5b3ca6SJunchao Zhang if (!dat->ioffset) { 420dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(size+1,&dat->ioffset);CHKERRQ(ierr); 421dd5b3ca6SJunchao Zhang for (i=0; i<=size; i++) dat->ioffset[i] = i*sf->nroots; 422dd5b3ca6SJunchao Zhang } 423dd5b3ca6SJunchao Zhang *ioffset = dat->ioffset; 424dd5b3ca6SJunchao Zhang } 425dd5b3ca6SJunchao Zhang 426dd5b3ca6SJunchao Zhang if (irootloc) { 427dd5b3ca6SJunchao Zhang if (!dat->irootloc) { 428dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&dat->irootloc);CHKERRQ(ierr); 429dd5b3ca6SJunchao Zhang for (i=0; i<size; i++) { 430dd5b3ca6SJunchao Zhang for (j=0; j<sf->nroots; j++) dat->irootloc[i*sf->nroots+j] = j; 431dd5b3ca6SJunchao Zhang } 432dd5b3ca6SJunchao Zhang } 433dd5b3ca6SJunchao Zhang *irootloc = dat->irootloc; 434dd5b3ca6SJunchao Zhang } 435dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 436dd5b3ca6SJunchao Zhang } 437dd5b3ca6SJunchao Zhang 438dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Allgatherv(PetscSF sf,PetscSF *out) 439dd5b3ca6SJunchao Zhang { 440dd5b3ca6SJunchao Zhang PetscInt i,nroots,nleaves,rstart,*ilocal; 441dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 442dd5b3ca6SJunchao Zhang PetscSF lsf; 443dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 444dd5b3ca6SJunchao Zhang 445dd5b3ca6SJunchao Zhang PetscFunctionBegin; 446dd5b3ca6SJunchao Zhang nroots = sf->nroots; 447dd5b3ca6SJunchao Zhang nleaves = sf->nleaves ? sf->nroots : 0; 448dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 449dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 450dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 451dd5b3ca6SJunchao Zhang 452dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 453dd5b3ca6SJunchao Zhang ilocal[i] = rstart + i; /* lsf does not change leave indices */ 454dd5b3ca6SJunchao Zhang iremote[i].rank = 0; /* rank in PETSC_COMM_SELF */ 455dd5b3ca6SJunchao Zhang iremote[i].index = i; /* root index */ 456dd5b3ca6SJunchao Zhang } 457dd5b3ca6SJunchao Zhang 458dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 459dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 460dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 461dd5b3ca6SJunchao Zhang *out = lsf; 462dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 463dd5b3ca6SJunchao Zhang } 464dd5b3ca6SJunchao Zhang 465dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Allgatherv(PetscSF sf) 466dd5b3ca6SJunchao Zhang { 467dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 468dd5b3ca6SJunchao Zhang PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 469dd5b3ca6SJunchao Zhang 470dd5b3ca6SJunchao Zhang PetscFunctionBegin; 471dd5b3ca6SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgatherv; 472dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 473dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 474dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 475dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 476dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 477dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgatherv; 478dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 479dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Allgatherv; 480dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 481dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv; 482dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 483dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 484dd5b3ca6SJunchao Zhang sf->ops->BcastToZero = PetscSFBcastToZero_Allgatherv; 485dd5b3ca6SJunchao Zhang 486dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 487dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 488dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 489dd5b3ca6SJunchao Zhang } 490