1c4762a1bSJed Brown static char help[] = "Demonstrates BuildTwoSided functions.\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscsys.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown typedef struct { 6c4762a1bSJed Brown PetscInt rank; 7c4762a1bSJed Brown PetscScalar value; 8c4762a1bSJed Brown char ok[3]; 9c4762a1bSJed Brown } Unit; 10c4762a1bSJed Brown 11c4762a1bSJed Brown static PetscErrorCode MakeDatatype(MPI_Datatype *dtype) 12c4762a1bSJed Brown { 13c4762a1bSJed Brown MPI_Datatype dtypes[3],tmptype; 14c4762a1bSJed Brown PetscMPIInt lengths[3]; 15c4762a1bSJed Brown MPI_Aint displs[3]; 16c4762a1bSJed Brown Unit dummy; 17c4762a1bSJed Brown 18c4762a1bSJed Brown PetscFunctionBegin; 19c4762a1bSJed Brown dtypes[0] = MPIU_INT; 20c4762a1bSJed Brown dtypes[1] = MPIU_SCALAR; 21c4762a1bSJed Brown dtypes[2] = MPI_CHAR; 22c4762a1bSJed Brown lengths[0] = 1; 23c4762a1bSJed Brown lengths[1] = 1; 24c4762a1bSJed Brown lengths[2] = 3; 25c4762a1bSJed Brown /* Curse the evil beings that made std::complex a non-POD type. */ 26c4762a1bSJed Brown displs[0] = (char*)&dummy.rank - (char*)&dummy; /* offsetof(Unit,rank); */ 27c4762a1bSJed Brown displs[1] = (char*)&dummy.value - (char*)&dummy; /* offsetof(Unit,value); */ 28c4762a1bSJed Brown displs[2] = (char*)&dummy.ok - (char*)&dummy; /* offsetof(Unit,ok); */ 295f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_struct(3,lengths,displs,dtypes,&tmptype)); 305f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_commit(&tmptype)); 315f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_resized(tmptype,0,sizeof(Unit),dtype)); 325f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_commit(dtype)); 335f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&tmptype)); 34c4762a1bSJed Brown { 35c4762a1bSJed Brown MPI_Aint lb,extent; 365f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_get_extent(*dtype,&lb,&extent)); 372c71b3e2SJacob Faibussowitsch PetscCheckFalse(extent != sizeof(Unit),PETSC_COMM_WORLD,PETSC_ERR_LIB,"New type has extent %d != sizeof(Unit) %d",(int)extent,(int)sizeof(Unit)); 38c4762a1bSJed Brown } 39c4762a1bSJed Brown PetscFunctionReturn(0); 40c4762a1bSJed Brown } 41c4762a1bSJed Brown 42c4762a1bSJed Brown struct FCtx { 43c4762a1bSJed Brown PetscMPIInt rank; 44c4762a1bSJed Brown PetscMPIInt nto; 45c4762a1bSJed Brown PetscMPIInt *toranks; 46c4762a1bSJed Brown Unit *todata; 47c4762a1bSJed Brown PetscSegBuffer seg; 48c4762a1bSJed Brown }; 49c4762a1bSJed Brown 50c4762a1bSJed Brown static PetscErrorCode FSend(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt tonum,PetscMPIInt rank,void *todata,MPI_Request req[],void *ctx) 51c4762a1bSJed Brown { 52c4762a1bSJed Brown struct FCtx *fctx = (struct FCtx*)ctx; 53c4762a1bSJed Brown 54c4762a1bSJed Brown PetscFunctionBegin; 552c71b3e2SJacob Faibussowitsch PetscCheckFalse(rank != fctx->toranks[tonum],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Rank %d does not match toranks[%d] %d",rank,tonum,fctx->toranks[tonum]); 562c71b3e2SJacob Faibussowitsch PetscCheckFalse(fctx->rank != *(PetscMPIInt*)todata,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Todata %d does not match rank %d",*(PetscMPIInt*)todata,fctx->rank); 575f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(&fctx->todata[tonum].rank,1,MPIU_INT,rank,tag[0],comm,&req[0])); 585f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(&fctx->todata[tonum].value,1,MPIU_SCALAR,rank,tag[1],comm,&req[1])); 59c4762a1bSJed Brown PetscFunctionReturn(0); 60c4762a1bSJed Brown } 61c4762a1bSJed Brown 62c4762a1bSJed Brown static PetscErrorCode FRecv(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *fromdata,MPI_Request req[],void *ctx) 63c4762a1bSJed Brown { 64c4762a1bSJed Brown struct FCtx *fctx = (struct FCtx*)ctx; 65c4762a1bSJed Brown Unit *buf; 66c4762a1bSJed Brown 67c4762a1bSJed Brown PetscFunctionBegin; 682c71b3e2SJacob Faibussowitsch PetscCheckFalse(*(PetscMPIInt*)fromdata != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Dummy data %d from rank %d corrupt",*(PetscMPIInt*)fromdata,rank); 695f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSegBufferGet(fctx->seg,1,&buf)); 705f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Irecv(&buf->rank,1,MPIU_INT,rank,tag[0],comm,&req[0])); 715f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Irecv(&buf->value,1,MPIU_SCALAR,rank,tag[1],comm,&req[1])); 72c4762a1bSJed Brown buf->ok[0] = 'o'; 73c4762a1bSJed Brown buf->ok[1] = 'k'; 74c4762a1bSJed Brown buf->ok[2] = 0; 75c4762a1bSJed Brown PetscFunctionReturn(0); 76c4762a1bSJed Brown } 77c4762a1bSJed Brown 78c4762a1bSJed Brown int main(int argc,char **argv) 79c4762a1bSJed Brown { 80c4762a1bSJed Brown PetscErrorCode ierr; 81c4762a1bSJed Brown PetscMPIInt rank,size,*toranks,*fromranks,nto,nfrom; 82c4762a1bSJed Brown PetscInt i,n; 83c4762a1bSJed Brown PetscBool verbose,build_twosided_f; 84c4762a1bSJed Brown Unit *todata,*fromdata; 85c4762a1bSJed Brown MPI_Datatype dtype; 86c4762a1bSJed Brown 87c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 885f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 895f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 90c4762a1bSJed Brown 91c4762a1bSJed Brown verbose = PETSC_FALSE; 925f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-verbose",&verbose,NULL)); 93c4762a1bSJed Brown build_twosided_f = PETSC_FALSE; 945f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-build_twosided_f",&build_twosided_f,NULL)); 95c4762a1bSJed Brown 96c4762a1bSJed Brown for (i=1,nto=0; i<size; i*=2) nto++; 975f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(nto,&todata,nto,&toranks)); 98c4762a1bSJed Brown for (n=0,i=1; i<size; n++,i*=2) { 99c4762a1bSJed Brown toranks[n] = (rank+i) % size; 100c4762a1bSJed Brown todata[n].rank = (rank+i) % size; 101c4762a1bSJed Brown todata[n].value = (PetscScalar)rank; 102c4762a1bSJed Brown todata[n].ok[0] = 'o'; 103c4762a1bSJed Brown todata[n].ok[1] = 'k'; 104c4762a1bSJed Brown todata[n].ok[2] = 0; 105c4762a1bSJed Brown } 106c4762a1bSJed Brown if (verbose) { 107c4762a1bSJed Brown for (i=0; i<nto; i++) { 1085f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] TO %d: {%" PetscInt_FMT ", %g, \"%s\"}\n",rank,toranks[i],todata[i].rank,(double)PetscRealPart(todata[i].value),todata[i].ok)); 109c4762a1bSJed Brown } 1105f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 111c4762a1bSJed Brown } 112c4762a1bSJed Brown 1135f80ce2aSJacob Faibussowitsch CHKERRQ(MakeDatatype(&dtype)); 114c4762a1bSJed Brown 115c4762a1bSJed Brown if (build_twosided_f) { 116c4762a1bSJed Brown struct FCtx fctx; 117c4762a1bSJed Brown PetscMPIInt *todummy,*fromdummy; 118c4762a1bSJed Brown fctx.rank = rank; 119c4762a1bSJed Brown fctx.nto = nto; 120c4762a1bSJed Brown fctx.toranks = toranks; 121c4762a1bSJed Brown fctx.todata = todata; 1225f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSegBufferCreate(sizeof(Unit),1,&fctx.seg)); 1235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nto,&todummy)); 124c4762a1bSJed Brown for (i=0; i<nto; i++) todummy[i] = rank; 1255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommBuildTwoSidedF(PETSC_COMM_WORLD,1,MPI_INT,nto,toranks,todummy,&nfrom,&fromranks,&fromdummy,2,FSend,FRecv,&fctx)); 1265f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(todummy)); 1275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(fromdummy)); 1285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSegBufferExtractAlloc(fctx.seg,&fromdata)); 1295f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSegBufferDestroy(&fctx.seg)); 130c4762a1bSJed Brown } else { 1315f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommBuildTwoSided(PETSC_COMM_WORLD,1,dtype,nto,toranks,todata,&nfrom,&fromranks,&fromdata)); 132c4762a1bSJed Brown } 1335f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&dtype)); 134c4762a1bSJed Brown 135c4762a1bSJed Brown if (verbose) { 136c4762a1bSJed Brown PetscInt *iranks,*iperm; 1375f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(nfrom,&iranks,nfrom,&iperm)); 138c4762a1bSJed Brown for (i=0; i<nfrom; i++) { 139c4762a1bSJed Brown iranks[i] = fromranks[i]; 140c4762a1bSJed Brown iperm[i] = i; 141c4762a1bSJed Brown } 142c4762a1bSJed Brown /* Receive ordering is non-deterministic in general, so sort to make verbose output deterministic. */ 1435f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortIntWithPermutation(nfrom,iranks,iperm)); 144c4762a1bSJed Brown for (i=0; i<nfrom; i++) { 145c4762a1bSJed Brown PetscInt ip = iperm[i]; 1465f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] FROM %d: {%" PetscInt_FMT ", %g, \"%s\"}\n",rank,fromranks[ip],fromdata[ip].rank,(double)PetscRealPart(fromdata[ip].value),fromdata[ip].ok)); 147c4762a1bSJed Brown } 1485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 1495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(iranks,iperm)); 150c4762a1bSJed Brown } 151c4762a1bSJed Brown 1522c71b3e2SJacob Faibussowitsch PetscCheckFalse(nto != nfrom,PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] From ranks %d does not match To ranks %d",rank,nto,nfrom); 153c4762a1bSJed Brown for (i=1; i<size; i*=2) { 154c4762a1bSJed Brown PetscMPIInt expected_rank = (rank-i+size)%size; 155c4762a1bSJed Brown PetscBool flg; 156c4762a1bSJed Brown for (n=0; n<nfrom; n++) { 157c4762a1bSJed Brown if (expected_rank == fromranks[n]) goto found; 158c4762a1bSJed Brown } 15998921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"[%d] Could not find expected from rank %d",rank,expected_rank); 160c4762a1bSJed Brown found: 1612c71b3e2SJacob Faibussowitsch PetscCheckFalse(PetscRealPart(fromdata[n].value) != expected_rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got data %g from rank %d",rank,(double)PetscRealPart(fromdata[n].value),expected_rank); 1625f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(fromdata[n].ok,"ok",&flg)); 163*28b400f6SJacob Faibussowitsch PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got string %s from rank %d",rank,fromdata[n].ok,expected_rank); 164c4762a1bSJed Brown } 1655f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(todata,toranks)); 1665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(fromdata)); 1675f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(fromranks)); 168c4762a1bSJed Brown ierr = PetscFinalize(); 169c4762a1bSJed Brown return ierr; 170c4762a1bSJed Brown } 171c4762a1bSJed Brown 172c4762a1bSJed Brown /*TEST 173c4762a1bSJed Brown 174c4762a1bSJed Brown test: 175c4762a1bSJed Brown nsize: 4 176c4762a1bSJed Brown args: -verbose -build_twosided allreduce 177c4762a1bSJed Brown 178c4762a1bSJed Brown test: 179c4762a1bSJed Brown suffix: f 180c4762a1bSJed Brown nsize: 4 181c4762a1bSJed Brown args: -verbose -build_twosided_f -build_twosided allreduce 182c4762a1bSJed Brown output_file: output/ex8_1.out 183c4762a1bSJed Brown 184c4762a1bSJed Brown test: 185c4762a1bSJed Brown suffix: f_ibarrier 186c4762a1bSJed Brown nsize: 4 187c4762a1bSJed Brown args: -verbose -build_twosided_f -build_twosided ibarrier 188c4762a1bSJed Brown output_file: output/ex8_1.out 1898ee3e7ecSJunchao Zhang requires: defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 190c4762a1bSJed Brown 191c4762a1bSJed Brown test: 192c4762a1bSJed Brown suffix: ibarrier 193c4762a1bSJed Brown nsize: 4 194c4762a1bSJed Brown args: -verbose -build_twosided ibarrier 195c4762a1bSJed Brown output_file: output/ex8_1.out 1968ee3e7ecSJunchao Zhang requires: defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 197c4762a1bSJed Brown 198c4762a1bSJed Brown test: 199c4762a1bSJed Brown suffix: redscatter 200c4762a1bSJed Brown requires: mpi_reduce_scatter_block 201c4762a1bSJed Brown nsize: 4 202c4762a1bSJed Brown args: -verbose -build_twosided redscatter 203c4762a1bSJed Brown output_file: output/ex8_1.out 204c4762a1bSJed Brown 205c4762a1bSJed Brown TEST*/ 206