1*c4762a1bSJed Brown static char help[] = "Demonstrates BuildTwoSided functions.\n"; 2*c4762a1bSJed Brown 3*c4762a1bSJed Brown #include <petscsys.h> 4*c4762a1bSJed Brown 5*c4762a1bSJed Brown typedef struct { 6*c4762a1bSJed Brown PetscInt rank; 7*c4762a1bSJed Brown PetscScalar value; 8*c4762a1bSJed Brown char ok[3]; 9*c4762a1bSJed Brown } Unit; 10*c4762a1bSJed Brown 11*c4762a1bSJed Brown static PetscErrorCode MakeDatatype(MPI_Datatype *dtype) 12*c4762a1bSJed Brown { 13*c4762a1bSJed Brown PetscErrorCode ierr; 14*c4762a1bSJed Brown MPI_Datatype dtypes[3],tmptype; 15*c4762a1bSJed Brown PetscMPIInt lengths[3]; 16*c4762a1bSJed Brown MPI_Aint displs[3]; 17*c4762a1bSJed Brown Unit dummy; 18*c4762a1bSJed Brown 19*c4762a1bSJed Brown PetscFunctionBegin; 20*c4762a1bSJed Brown dtypes[0] = MPIU_INT; 21*c4762a1bSJed Brown dtypes[1] = MPIU_SCALAR; 22*c4762a1bSJed Brown dtypes[2] = MPI_CHAR; 23*c4762a1bSJed Brown lengths[0] = 1; 24*c4762a1bSJed Brown lengths[1] = 1; 25*c4762a1bSJed Brown lengths[2] = 3; 26*c4762a1bSJed Brown /* Curse the evil beings that made std::complex a non-POD type. */ 27*c4762a1bSJed Brown displs[0] = (char*)&dummy.rank - (char*)&dummy; /* offsetof(Unit,rank); */ 28*c4762a1bSJed Brown displs[1] = (char*)&dummy.value - (char*)&dummy; /* offsetof(Unit,value); */ 29*c4762a1bSJed Brown displs[2] = (char*)&dummy.ok - (char*)&dummy; /* offsetof(Unit,ok); */ 30*c4762a1bSJed Brown ierr = MPI_Type_create_struct(3,lengths,displs,dtypes,&tmptype);CHKERRQ(ierr); 31*c4762a1bSJed Brown ierr = MPI_Type_commit(&tmptype);CHKERRQ(ierr); 32*c4762a1bSJed Brown ierr = MPI_Type_create_resized(tmptype,0,sizeof(Unit),dtype);CHKERRQ(ierr); 33*c4762a1bSJed Brown ierr = MPI_Type_commit(dtype);CHKERRQ(ierr); 34*c4762a1bSJed Brown ierr = MPI_Type_free(&tmptype);CHKERRQ(ierr); 35*c4762a1bSJed Brown { 36*c4762a1bSJed Brown MPI_Aint lb,extent; 37*c4762a1bSJed Brown ierr = MPI_Type_get_extent(*dtype,&lb,&extent);CHKERRQ(ierr); 38*c4762a1bSJed Brown if (extent != sizeof(Unit)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_LIB,"New type has extent %d != sizeof(Unit) %d",extent,(int)sizeof(Unit)); 39*c4762a1bSJed Brown } 40*c4762a1bSJed Brown PetscFunctionReturn(0); 41*c4762a1bSJed Brown } 42*c4762a1bSJed Brown 43*c4762a1bSJed Brown struct FCtx { 44*c4762a1bSJed Brown PetscMPIInt rank; 45*c4762a1bSJed Brown PetscMPIInt nto; 46*c4762a1bSJed Brown PetscMPIInt *toranks; 47*c4762a1bSJed Brown Unit *todata; 48*c4762a1bSJed Brown PetscSegBuffer seg; 49*c4762a1bSJed Brown }; 50*c4762a1bSJed Brown 51*c4762a1bSJed Brown static PetscErrorCode FSend(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt tonum,PetscMPIInt rank,void *todata,MPI_Request req[],void *ctx) 52*c4762a1bSJed Brown { 53*c4762a1bSJed Brown struct FCtx *fctx = (struct FCtx*)ctx; 54*c4762a1bSJed Brown PetscErrorCode ierr; 55*c4762a1bSJed Brown 56*c4762a1bSJed Brown PetscFunctionBegin; 57*c4762a1bSJed Brown if (rank != fctx->toranks[tonum]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Rank %d does not match toranks[%d] %d",rank,tonum,fctx->toranks[tonum]); 58*c4762a1bSJed Brown if (fctx->rank != *(PetscMPIInt*)todata) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Todata %d does not match rank %d",*(PetscMPIInt*)todata,fctx->rank); 59*c4762a1bSJed Brown ierr = MPI_Isend(&fctx->todata[tonum].rank,1,MPIU_INT,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 60*c4762a1bSJed Brown ierr = MPI_Isend(&fctx->todata[tonum].value,1,MPIU_SCALAR,rank,tag[1],comm,&req[1]);CHKERRQ(ierr); 61*c4762a1bSJed Brown PetscFunctionReturn(0); 62*c4762a1bSJed Brown } 63*c4762a1bSJed Brown 64*c4762a1bSJed Brown static PetscErrorCode FRecv(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *fromdata,MPI_Request req[],void *ctx) 65*c4762a1bSJed Brown { 66*c4762a1bSJed Brown struct FCtx *fctx = (struct FCtx*)ctx; 67*c4762a1bSJed Brown PetscErrorCode ierr; 68*c4762a1bSJed Brown Unit *buf; 69*c4762a1bSJed Brown 70*c4762a1bSJed Brown PetscFunctionBegin; 71*c4762a1bSJed Brown if (*(PetscMPIInt*)fromdata != rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Dummy data %d from rank %d corrupt",*(PetscMPIInt*)fromdata,rank); 72*c4762a1bSJed Brown ierr = PetscSegBufferGet(fctx->seg,1,&buf);CHKERRQ(ierr); 73*c4762a1bSJed Brown ierr = MPI_Irecv(&buf->rank,1,MPIU_INT,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 74*c4762a1bSJed Brown ierr = MPI_Irecv(&buf->value,1,MPIU_SCALAR,rank,tag[1],comm,&req[1]);CHKERRQ(ierr); 75*c4762a1bSJed Brown buf->ok[0] = 'o'; 76*c4762a1bSJed Brown buf->ok[1] = 'k'; 77*c4762a1bSJed Brown buf->ok[2] = 0; 78*c4762a1bSJed Brown PetscFunctionReturn(0); 79*c4762a1bSJed Brown } 80*c4762a1bSJed Brown 81*c4762a1bSJed Brown int main(int argc,char **argv) 82*c4762a1bSJed Brown { 83*c4762a1bSJed Brown PetscErrorCode ierr; 84*c4762a1bSJed Brown PetscMPIInt rank,size,*toranks,*fromranks,nto,nfrom; 85*c4762a1bSJed Brown PetscInt i,n; 86*c4762a1bSJed Brown PetscBool verbose,build_twosided_f; 87*c4762a1bSJed Brown Unit *todata,*fromdata; 88*c4762a1bSJed Brown MPI_Datatype dtype; 89*c4762a1bSJed Brown 90*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 91*c4762a1bSJed Brown ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 92*c4762a1bSJed Brown ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 93*c4762a1bSJed Brown 94*c4762a1bSJed Brown verbose = PETSC_FALSE; 95*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-verbose",&verbose,NULL);CHKERRQ(ierr); 96*c4762a1bSJed Brown build_twosided_f = PETSC_FALSE; 97*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-build_twosided_f",&build_twosided_f,NULL);CHKERRQ(ierr); 98*c4762a1bSJed Brown 99*c4762a1bSJed Brown for (i=1,nto=0; i<size; i*=2) nto++; 100*c4762a1bSJed Brown ierr = PetscMalloc2(nto,&todata,nto,&toranks);CHKERRQ(ierr); 101*c4762a1bSJed Brown for (n=0,i=1; i<size; n++,i*=2) { 102*c4762a1bSJed Brown toranks[n] = (rank+i) % size; 103*c4762a1bSJed Brown todata[n].rank = (rank+i) % size; 104*c4762a1bSJed Brown todata[n].value = (PetscScalar)rank; 105*c4762a1bSJed Brown todata[n].ok[0] = 'o'; 106*c4762a1bSJed Brown todata[n].ok[1] = 'k'; 107*c4762a1bSJed Brown todata[n].ok[2] = 0; 108*c4762a1bSJed Brown } 109*c4762a1bSJed Brown if (verbose) { 110*c4762a1bSJed Brown for (i=0; i<nto; i++) { 111*c4762a1bSJed Brown ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] TO %d: {%D, %g, \"%s\"}\n",rank,toranks[i],todata[i].rank,(double)PetscRealPart(todata[i].value),todata[i].ok);CHKERRQ(ierr); 112*c4762a1bSJed Brown } 113*c4762a1bSJed Brown ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 114*c4762a1bSJed Brown } 115*c4762a1bSJed Brown 116*c4762a1bSJed Brown ierr = MakeDatatype(&dtype);CHKERRQ(ierr); 117*c4762a1bSJed Brown 118*c4762a1bSJed Brown if (build_twosided_f) { 119*c4762a1bSJed Brown struct FCtx fctx; 120*c4762a1bSJed Brown PetscMPIInt *todummy,*fromdummy; 121*c4762a1bSJed Brown fctx.rank = rank; 122*c4762a1bSJed Brown fctx.nto = nto; 123*c4762a1bSJed Brown fctx.toranks = toranks; 124*c4762a1bSJed Brown fctx.todata = todata; 125*c4762a1bSJed Brown ierr = PetscSegBufferCreate(sizeof(Unit),1,&fctx.seg);CHKERRQ(ierr); 126*c4762a1bSJed Brown ierr = PetscMalloc1(nto,&todummy);CHKERRQ(ierr); 127*c4762a1bSJed Brown for (i=0; i<nto; i++) todummy[i] = rank; 128*c4762a1bSJed Brown ierr = PetscCommBuildTwoSidedF(PETSC_COMM_WORLD,1,MPI_INT,nto,toranks,todummy,&nfrom,&fromranks,&fromdummy,2,FSend,FRecv,&fctx);CHKERRQ(ierr); 129*c4762a1bSJed Brown ierr = PetscFree(todummy);CHKERRQ(ierr); 130*c4762a1bSJed Brown ierr = PetscFree(fromdummy);CHKERRQ(ierr); 131*c4762a1bSJed Brown ierr = PetscSegBufferExtractAlloc(fctx.seg,&fromdata);CHKERRQ(ierr); 132*c4762a1bSJed Brown ierr = PetscSegBufferDestroy(&fctx.seg);CHKERRQ(ierr); 133*c4762a1bSJed Brown } else { 134*c4762a1bSJed Brown ierr = PetscCommBuildTwoSided(PETSC_COMM_WORLD,1,dtype,nto,toranks,todata,&nfrom,&fromranks,&fromdata);CHKERRQ(ierr); 135*c4762a1bSJed Brown } 136*c4762a1bSJed Brown ierr = MPI_Type_free(&dtype);CHKERRQ(ierr); 137*c4762a1bSJed Brown 138*c4762a1bSJed Brown if (verbose) { 139*c4762a1bSJed Brown PetscInt *iranks,*iperm; 140*c4762a1bSJed Brown ierr = PetscMalloc2(nfrom,&iranks,nfrom,&iperm);CHKERRQ(ierr); 141*c4762a1bSJed Brown for (i=0; i<nfrom; i++) { 142*c4762a1bSJed Brown iranks[i] = fromranks[i]; 143*c4762a1bSJed Brown iperm[i] = i; 144*c4762a1bSJed Brown } 145*c4762a1bSJed Brown /* Receive ordering is non-deterministic in general, so sort to make verbose output deterministic. */ 146*c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nfrom,iranks,iperm);CHKERRQ(ierr); 147*c4762a1bSJed Brown for (i=0; i<nfrom; i++) { 148*c4762a1bSJed Brown PetscInt ip = iperm[i]; 149*c4762a1bSJed Brown ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] FROM %d: {%D, %g, \"%s\"}\n",rank,fromranks[ip],fromdata[ip].rank,(double)PetscRealPart(fromdata[ip].value),fromdata[ip].ok);CHKERRQ(ierr); 150*c4762a1bSJed Brown } 151*c4762a1bSJed Brown ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 152*c4762a1bSJed Brown ierr = PetscFree2(iranks,iperm);CHKERRQ(ierr); 153*c4762a1bSJed Brown } 154*c4762a1bSJed Brown 155*c4762a1bSJed Brown if (nto != nfrom) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] From ranks %d does not match To ranks %d",rank,nto,nfrom); 156*c4762a1bSJed Brown for (i=1; i<size; i*=2) { 157*c4762a1bSJed Brown PetscMPIInt expected_rank = (rank-i+size)%size; 158*c4762a1bSJed Brown PetscBool flg; 159*c4762a1bSJed Brown for (n=0; n<nfrom; n++) { 160*c4762a1bSJed Brown if (expected_rank == fromranks[n]) goto found; 161*c4762a1bSJed Brown } 162*c4762a1bSJed Brown SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"[%d] Could not find expected from rank %d",rank,expected_rank); 163*c4762a1bSJed Brown found: 164*c4762a1bSJed Brown if (PetscRealPart(fromdata[n].value) != expected_rank) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got data %g from rank %d",rank,(double)PetscRealPart(fromdata[n].value),expected_rank); 165*c4762a1bSJed Brown ierr = PetscStrcmp(fromdata[n].ok,"ok",&flg);CHKERRQ(ierr); 166*c4762a1bSJed Brown if (!flg) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got string %s from rank %d",rank,fromdata[n].ok,expected_rank); 167*c4762a1bSJed Brown } 168*c4762a1bSJed Brown ierr = PetscFree2(todata,toranks);CHKERRQ(ierr); 169*c4762a1bSJed Brown ierr = PetscFree(fromdata);CHKERRQ(ierr); 170*c4762a1bSJed Brown ierr = PetscFree(fromranks);CHKERRQ(ierr); 171*c4762a1bSJed Brown ierr = PetscFinalize(); 172*c4762a1bSJed Brown return ierr; 173*c4762a1bSJed Brown } 174*c4762a1bSJed Brown 175*c4762a1bSJed Brown 176*c4762a1bSJed Brown 177*c4762a1bSJed Brown /*TEST 178*c4762a1bSJed Brown 179*c4762a1bSJed Brown test: 180*c4762a1bSJed Brown nsize: 4 181*c4762a1bSJed Brown args: -verbose -build_twosided allreduce 182*c4762a1bSJed Brown 183*c4762a1bSJed Brown test: 184*c4762a1bSJed Brown suffix: f 185*c4762a1bSJed Brown nsize: 4 186*c4762a1bSJed Brown args: -verbose -build_twosided_f -build_twosided allreduce 187*c4762a1bSJed Brown output_file: output/ex8_1.out 188*c4762a1bSJed Brown 189*c4762a1bSJed Brown test: 190*c4762a1bSJed Brown suffix: f_ibarrier 191*c4762a1bSJed Brown nsize: 4 192*c4762a1bSJed Brown args: -verbose -build_twosided_f -build_twosided ibarrier 193*c4762a1bSJed Brown output_file: output/ex8_1.out 194*c4762a1bSJed Brown 195*c4762a1bSJed Brown test: 196*c4762a1bSJed Brown suffix: ibarrier 197*c4762a1bSJed Brown nsize: 4 198*c4762a1bSJed Brown args: -verbose -build_twosided ibarrier 199*c4762a1bSJed Brown output_file: output/ex8_1.out 200*c4762a1bSJed Brown 201*c4762a1bSJed Brown test: 202*c4762a1bSJed Brown suffix: redscatter 203*c4762a1bSJed Brown requires: mpi_reduce_scatter_block 204*c4762a1bSJed Brown nsize: 4 205*c4762a1bSJed Brown args: -verbose -build_twosided redscatter 206*c4762a1bSJed Brown output_file: output/ex8_1.out 207*c4762a1bSJed Brown 208*c4762a1bSJed Brown TEST*/ 209