197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\ 297929ea7SJunchao Zhang 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\ 38a53a0a4SSajid Ali required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\ 48a53a0a4SSajid Ali To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n"; 597929ea7SJunchao Zhang 697929ea7SJunchao Zhang #include <petscvec.h> 797929ea7SJunchao Zhang int main(int argc,char **argv) 897929ea7SJunchao Zhang { 997929ea7SJunchao Zhang PetscMPIInt nproc,grank,mycolor; 1097929ea7SJunchao Zhang PetscInt i,n,N = 20,low,high; 1197929ea7SJunchao Zhang MPI_Comm subcomm; 128a53a0a4SSajid Ali Vec x = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 138a53a0a4SSajid Ali Vec yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 1497929ea7SJunchao Zhang VecScatter vscat; 1597929ea7SJunchao Zhang IS ix,iy; 168a53a0a4SSajid Ali PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */ 178a53a0a4SSajid Ali PetscBool optionflag, compareflag; 188a53a0a4SSajid Ali char vectypename[PETSC_MAX_PATH_LEN]; 1997929ea7SJunchao Zhang PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */ 2097929ea7SJunchao Zhang PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */ 2197929ea7SJunchao Zhang PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */ 2297929ea7SJunchao Zhang 239566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc)); 259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank)); 2697929ea7SJunchao Zhang 27*08401ef6SPierre Jolivet PetscCheck(nproc >= 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run"); 2897929ea7SJunchao Zhang 299566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL)); 309566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL)); 319566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL)); 329566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag)); 338a53a0a4SSajid Ali if (optionflag) { 349566063dSJacob Faibussowitsch PetscCall(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag)); 358a53a0a4SSajid Ali if (compareflag) iscuda = PETSC_TRUE; 368a53a0a4SSajid Ali } 3797929ea7SJunchao Zhang 3897929ea7SJunchao Zhang /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */ 3997929ea7SJunchao Zhang mycolor = grank % 3; 409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm)); 4197929ea7SJunchao Zhang 4297929ea7SJunchao Zhang /*=========================================================================== 4397929ea7SJunchao Zhang * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on 4497929ea7SJunchao Zhang * a subcommunicator of PETSC_COMM_WORLD and vice versa. 4597929ea7SJunchao Zhang *===========================================================================*/ 4697929ea7SJunchao Zhang if (world2sub) { 479566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 489566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N)); 498a53a0a4SSajid Ali if (iscuda) { 509566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA)); 518a53a0a4SSajid Ali } else { 529566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD)); 538a53a0a4SSajid Ali } 549566063dSJacob Faibussowitsch PetscCall(VecSetUp(x)); 559566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */ 5697929ea7SJunchao Zhang 5797929ea7SJunchao Zhang /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */ 589566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x,&low,&high)); 5997929ea7SJunchao Zhang for (i=low; i<high; i++) { 6097929ea7SJunchao Zhang PetscScalar val = -i; 619566063dSJacob Faibussowitsch PetscCall(VecSetValue(x,i,val,INSERT_VALUES)); 6297929ea7SJunchao Zhang } 639566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 649566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 6597929ea7SJunchao Zhang 6697929ea7SJunchao Zhang /* Transfer x to a vector y only defined on subcomm0 and vice versa */ 6797929ea7SJunchao Zhang if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */ 6897929ea7SJunchao Zhang Vec y; 6997929ea7SJunchao Zhang PetscScalar *yvalue; 709566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y)); 719566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N)); 728a53a0a4SSajid Ali if (iscuda) { 739566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA)); 748a53a0a4SSajid Ali } else { 759566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD)); 768a53a0a4SSajid Ali } 779566063dSJacob Faibussowitsch PetscCall(VecSetUp(y)); 789566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */ 799566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y,&n)); 808a53a0a4SSajid Ali if (iscuda) { 818a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 829566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y,&yvalue)); 838a53a0a4SSajid Ali #endif 848a53a0a4SSajid Ali } else { 859566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yvalue)); 868a53a0a4SSajid Ali } 8797929ea7SJunchao Zhang /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue. 8897929ea7SJunchao Zhang Note this is a collective call. All processes have to call it and supply consistent N. 8997929ea7SJunchao Zhang */ 908a53a0a4SSajid Ali if (iscuda) { 918a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 929566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg)); 938a53a0a4SSajid Ali #endif 948a53a0a4SSajid Ali } else { 959566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg)); 968a53a0a4SSajid Ali } 9797929ea7SJunchao Zhang 9897929ea7SJunchao Zhang /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */ 999566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */ 1009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix)); 1019566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix,&iy)); 10297929ea7SJunchao Zhang 10397929ea7SJunchao Zhang /* Union of ix's on subcomm0 covers the full range of [0,N) */ 1049566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat)); 1059566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 1069566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 10797929ea7SJunchao Zhang 10897929ea7SJunchao Zhang /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations. 10997929ea7SJunchao Zhang VecGetArray must be paired with VecRestoreArray. 11097929ea7SJunchao Zhang */ 1118a53a0a4SSajid Ali if (iscuda) { 1128a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1139566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y,&yvalue)); 1148a53a0a4SSajid Ali #endif 1158a53a0a4SSajid Ali } else { 1169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yvalue)); 1178a53a0a4SSajid Ali } 11897929ea7SJunchao Zhang 11997929ea7SJunchao Zhang /* Libraries on subcomm0 can safely use y now, for example, view and scale it */ 1209566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm))); 1219566063dSJacob Faibussowitsch PetscCall(VecScale(y,2.0)); 12297929ea7SJunchao Zhang 12397929ea7SJunchao Zhang /* Send the new y back to x */ 1249566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */ 12597929ea7SJunchao Zhang /* Supply new yvalue to yg without memory copying */ 1269566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(yg,yvalue)); 1279566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1289566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1299566063dSJacob Faibussowitsch PetscCall(VecResetArray(yg)); 1308a53a0a4SSajid Ali if (iscuda) { 1318a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1329566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y,&yvalue)); 1338a53a0a4SSajid Ali #endif 1348a53a0a4SSajid Ali } else { 1359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yvalue)); 1368a53a0a4SSajid Ali } 13797929ea7SJunchao Zhang 1389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 13997929ea7SJunchao Zhang } else { 14097929ea7SJunchao Zhang /* Ranks outside of subcomm0 do not supply values to yg */ 1418a53a0a4SSajid Ali if (iscuda) { 1428a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1439566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg)); 1448a53a0a4SSajid Ali #endif 1458a53a0a4SSajid Ali } else { 1469566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg)); 1478a53a0a4SSajid Ali } 14897929ea7SJunchao Zhang 14997929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. The remaining 15097929ea7SJunchao Zhang ranks just need to create empty ISes to cheat VecScatterCreate. 15197929ea7SJunchao Zhang */ 1529566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix)); 1539566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix,&iy)); 15497929ea7SJunchao Zhang 1559566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat)); 1569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 1579566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 15897929ea7SJunchao Zhang 15997929ea7SJunchao Zhang /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send. 16097929ea7SJunchao Zhang But they have to call VecScatterBegin/End since these routines are collective. 16197929ea7SJunchao Zhang */ 1629566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1639566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 16497929ea7SJunchao Zhang } 16597929ea7SJunchao Zhang 1669566063dSJacob Faibussowitsch PetscCall(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 1679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix)); 1689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy)); 1699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 1709566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg)); 1719566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 17297929ea7SJunchao Zhang } /* world2sub */ 17397929ea7SJunchao Zhang 17497929ea7SJunchao Zhang /*=========================================================================== 17597929ea7SJunchao Zhang * Transfer a vector x defined on subcomm0 to a vector y defined on 17697929ea7SJunchao Zhang * subcomm1. The two subcomms are not overlapping and their union is 17797929ea7SJunchao Zhang * not necessarily equal to PETSC_COMM_WORLD. 17897929ea7SJunchao Zhang *===========================================================================*/ 17997929ea7SJunchao Zhang if (sub2sub) { 18097929ea7SJunchao Zhang if (mycolor == 0) { 18197929ea7SJunchao Zhang /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */ 18297929ea7SJunchao Zhang PetscInt n,N = 22; 18397929ea7SJunchao Zhang Vec x,xg,yg; 18497929ea7SJunchao Zhang IS ix,iy; 18597929ea7SJunchao Zhang VecScatter vscat; 18697929ea7SJunchao Zhang const PetscScalar *xvalue; 18797929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 18897929ea7SJunchao Zhang PetscMPIInt lrank; 18997929ea7SJunchao Zhang 1909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm,&lrank)); 1918a53a0a4SSajid Ali /* x is on subcomm */ 1929566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &x)); 1939566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N)); 1948a53a0a4SSajid Ali if (iscuda) { 1959566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA)); 1968a53a0a4SSajid Ali } else { 1979566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD)); 1988a53a0a4SSajid Ali } 1999566063dSJacob Faibussowitsch PetscCall(VecSetUp(x)); 2009566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x,&low,&high)); 20197929ea7SJunchao Zhang 20297929ea7SJunchao Zhang /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */ 20397929ea7SJunchao Zhang for (i=low; i<high; i++) { 20497929ea7SJunchao Zhang PetscScalar val = i; 2059566063dSJacob Faibussowitsch PetscCall(VecSetValue(x,i,val,INSERT_VALUES)); 20697929ea7SJunchao Zhang } 2079566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 2089566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 20997929ea7SJunchao Zhang 2109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm)); 21197929ea7SJunchao Zhang 21297929ea7SJunchao Zhang /* Tell rank 0 of subcomm1 the global size of x */ 2139566063dSJacob Faibussowitsch if (!lrank) PetscCallMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm)); 21497929ea7SJunchao Zhang 21597929ea7SJunchao Zhang /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm. 21697929ea7SJunchao Zhang But this order actually does not matter, since what we care is vector y, which is defined on subcomm1. 21797929ea7SJunchao Zhang */ 2189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm)); 21997929ea7SJunchao Zhang 22097929ea7SJunchao Zhang /* Create a vector xg on parentcomm, which shares memory with x */ 2219566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(x,&n)); 2228a53a0a4SSajid Ali if (iscuda) { 2238a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2249566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArrayRead(x,&xvalue)); 2259566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg)); 2268a53a0a4SSajid Ali #endif 2278a53a0a4SSajid Ali } else { 2289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xvalue)); 2299566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg)); 2308a53a0a4SSajid Ali } 23197929ea7SJunchao Zhang 23297929ea7SJunchao Zhang /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */ 2338a53a0a4SSajid Ali if (iscuda) { 2348a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2359566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg)); 2368a53a0a4SSajid Ali #endif 2378a53a0a4SSajid Ali } else { 2389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg)); 2398a53a0a4SSajid Ali } 24097929ea7SJunchao Zhang 24197929ea7SJunchao Zhang /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */ 2429566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */ 2439566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix)); 2449566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix,&iy)); 2459566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat)); 24697929ea7SJunchao Zhang 24797929ea7SJunchao Zhang /* Scatter values from xg to yg */ 2489566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 2499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 25097929ea7SJunchao Zhang 25197929ea7SJunchao Zhang /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */ 2528a53a0a4SSajid Ali if (iscuda) { 2538a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2549566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayRead(x,&xvalue)); 2558a53a0a4SSajid Ali #endif 2568a53a0a4SSajid Ali } else { 2579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xvalue)); 2588a53a0a4SSajid Ali } 2599566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 2609566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix)); 2619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy)); 2629566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xg)); 2639566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg)); 2649566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 2659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&intercomm)); 2669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&parentcomm)); 26797929ea7SJunchao Zhang } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */ 26897929ea7SJunchao Zhang PetscInt n,N; 26997929ea7SJunchao Zhang Vec y,xg,yg; 27097929ea7SJunchao Zhang IS ix,iy; 27197929ea7SJunchao Zhang VecScatter vscat; 27297929ea7SJunchao Zhang PetscScalar *yvalue; 27397929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 27497929ea7SJunchao Zhang PetscMPIInt lrank; 27597929ea7SJunchao Zhang 2769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm,&lrank)); 2779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm)); 27897929ea7SJunchao Zhang 27997929ea7SJunchao Zhang /* Two rank-0 are talking */ 2809566063dSJacob Faibussowitsch if (!lrank) PetscCallMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE)); 28197929ea7SJunchao Zhang /* Rank 0 of subcomm1 bcasts N to its members */ 2829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm)); 28397929ea7SJunchao Zhang 28497929ea7SJunchao Zhang /* Create a intracomm Petsc can work on */ 2859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm)); 28697929ea7SJunchao Zhang 28797929ea7SJunchao Zhang /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/ 2888a53a0a4SSajid Ali if (iscuda) { 2898a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2909566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg)); 2918a53a0a4SSajid Ali #endif 2928a53a0a4SSajid Ali } else { 2939566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg)); 2948a53a0a4SSajid Ali } 29597929ea7SJunchao Zhang 2969566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y)); 2979566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N)); 2988a53a0a4SSajid Ali if (iscuda) { 2999566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA)); 3008a53a0a4SSajid Ali } else { 3019566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD)); 3028a53a0a4SSajid Ali } 3039566063dSJacob Faibussowitsch PetscCall(VecSetUp(y)); 3048a53a0a4SSajid Ali 3059566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */ 3069566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y,&n)); 3078a53a0a4SSajid Ali if (iscuda) { 3088a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3099566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y,&yvalue)); 3108a53a0a4SSajid Ali #endif 3118a53a0a4SSajid Ali } else { 3129566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yvalue)); 3138a53a0a4SSajid Ali } 31497929ea7SJunchao Zhang /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be 31597929ea7SJunchao Zhang created in the same order in subcomm0/1. For example, we can not reverse the order of 31697929ea7SJunchao Zhang creating xg and yg in subcomm1. 31797929ea7SJunchao Zhang */ 3188a53a0a4SSajid Ali if (iscuda) { 3198a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3209566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg)); 3218a53a0a4SSajid Ali #endif 3228a53a0a4SSajid Ali } else { 3239566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg)); 3248a53a0a4SSajid Ali } 32597929ea7SJunchao Zhang 32697929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. 32797929ea7SJunchao Zhang ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate. 32897929ea7SJunchao Zhang */ 3299566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix)); 3309566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix,&iy)); 3319566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat)); 33297929ea7SJunchao Zhang 33397929ea7SJunchao Zhang /* Scatter values from xg to yg */ 3349566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 3359566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 33697929ea7SJunchao Zhang 33797929ea7SJunchao Zhang /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */ 3388a53a0a4SSajid Ali if (iscuda) { 3398a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3409566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y,&yvalue)); 3418a53a0a4SSajid Ali #endif 3428a53a0a4SSajid Ali } else { 3439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yvalue)); 3448a53a0a4SSajid Ali } 34597929ea7SJunchao Zhang 34697929ea7SJunchao Zhang /* Libraries on subcomm1 can safely use y now, for example, view it */ 3479566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm))); 34897929ea7SJunchao Zhang 3499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 3509566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix)); 3519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy)); 3529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xg)); 3539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg)); 3549566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 3559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&intercomm)); 3569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&parentcomm)); 35797929ea7SJunchao Zhang } else if (mycolor == 2) { /* subcomm2 */ 35897929ea7SJunchao Zhang /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */ 35997929ea7SJunchao Zhang } 36097929ea7SJunchao Zhang } /* sub2sub */ 36197929ea7SJunchao Zhang 36297929ea7SJunchao Zhang /*=========================================================================== 36397929ea7SJunchao Zhang * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on 36497929ea7SJunchao Zhang * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers 36597929ea7SJunchao Zhang * as we did in case 1, but that is not efficient. Instead, we use one vecscatter 36697929ea7SJunchao Zhang * to achieve that. 36797929ea7SJunchao Zhang *===========================================================================*/ 36897929ea7SJunchao Zhang if (world2subs) { 3698a53a0a4SSajid Ali Vec y; 37097929ea7SJunchao Zhang PetscInt n,N=15,xstart,ystart,low,high; 37197929ea7SJunchao Zhang PetscScalar *yvalue; 37297929ea7SJunchao Zhang 37397929ea7SJunchao Zhang /* Initialize x to [0, 1, 2, 3, ..., N-1] */ 3749566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 3759566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N)); 3768a53a0a4SSajid Ali if (iscuda) { 3779566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA)); 3788a53a0a4SSajid Ali } else { 3799566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD)); 3808a53a0a4SSajid Ali } 3819566063dSJacob Faibussowitsch PetscCall(VecSetUp(x)); 3829566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x,&low,&high)); 3839566063dSJacob Faibussowitsch for (i=low; i<high; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 3849566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 3859566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 38697929ea7SJunchao Zhang 38797929ea7SJunchao Zhang /* Every subcomm has a y as long as x */ 3889566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y)); 3899566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N)); 3908a53a0a4SSajid Ali if (iscuda) { 3919566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA)); 3928a53a0a4SSajid Ali } else { 3939566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD)); 3948a53a0a4SSajid Ali } 3959566063dSJacob Faibussowitsch PetscCall(VecSetUp(y)); 3969566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y,&n)); 39797929ea7SJunchao Zhang 39897929ea7SJunchao Zhang /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators). 39997929ea7SJunchao Zhang Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not 40097929ea7SJunchao Zhang necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank 40197929ea7SJunchao Zhang 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg. 40297929ea7SJunchao Zhang */ 4038a53a0a4SSajid Ali if (iscuda) { 4048a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4059566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y,&yvalue)); 4069566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg)); 4078a53a0a4SSajid Ali #endif 4088a53a0a4SSajid Ali } else { 4099566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yvalue)); 4109566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg)); 4118a53a0a4SSajid Ali } 4129566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */ 41397929ea7SJunchao Zhang 41497929ea7SJunchao Zhang /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y, 41597929ea7SJunchao Zhang since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg. 41697929ea7SJunchao Zhang */ 4179566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y,&xstart,NULL)); 4189566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(yg,&ystart,NULL)); 41997929ea7SJunchao Zhang 4209566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix)); 4219566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy)); 4229566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat)); 4239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 4249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 42597929ea7SJunchao Zhang 42697929ea7SJunchao Zhang /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */ 4279566063dSJacob Faibussowitsch PetscCall(VecView(yg,PETSC_VIEWER_STDOUT_WORLD)); 4289566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg)); 42997929ea7SJunchao Zhang 43097929ea7SJunchao Zhang /* Restory yvalue so that processes in subcomm can use y from now on. */ 4318a53a0a4SSajid Ali if (iscuda) { 4328a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4339566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y,&yvalue)); 4348a53a0a4SSajid Ali #endif 4358a53a0a4SSajid Ali } else { 4369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yvalue)); 4378a53a0a4SSajid Ali } 4389566063dSJacob Faibussowitsch PetscCall(VecScale(y,3.0)); 43997929ea7SJunchao Zhang 4409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */ 4419566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy)); 4429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 4439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 4449566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 44597929ea7SJunchao Zhang } /* world2subs */ 44697929ea7SJunchao Zhang 4479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm)); 4489566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 449b122ec5aSJacob Faibussowitsch return 0; 45097929ea7SJunchao Zhang } 45197929ea7SJunchao Zhang 45297929ea7SJunchao Zhang /*TEST 45397929ea7SJunchao Zhang 45497929ea7SJunchao Zhang build: 455dfd57a17SPierre Jolivet requires: !defined(PETSC_HAVE_MPIUNI) 456c6c723bbSStefano Zampini 45797929ea7SJunchao Zhang testset: 45897929ea7SJunchao Zhang nsize: 7 45997929ea7SJunchao Zhang 46097929ea7SJunchao Zhang test: 46197929ea7SJunchao Zhang suffix: 1 46297929ea7SJunchao Zhang args: -world2sub 46397929ea7SJunchao Zhang 46497929ea7SJunchao Zhang test: 46597929ea7SJunchao Zhang suffix: 2 46697929ea7SJunchao Zhang args: -sub2sub 467f424265bSStefano Zampini # deadlocks with NECMPI and INTELMPI (20210400300) 468f424265bSStefano Zampini requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION) 46997929ea7SJunchao Zhang 47097929ea7SJunchao Zhang test: 47197929ea7SJunchao Zhang suffix: 3 47297929ea7SJunchao Zhang args: -world2subs 47397929ea7SJunchao Zhang 47497929ea7SJunchao Zhang test: 47597929ea7SJunchao Zhang suffix: 4 4768a53a0a4SSajid Ali args: -world2sub -vectype cuda 4778a53a0a4SSajid Ali requires: cuda 4788a53a0a4SSajid Ali 4798a53a0a4SSajid Ali test: 4808a53a0a4SSajid Ali suffix: 5 4818a53a0a4SSajid Ali args: -sub2sub -vectype cuda 4828a53a0a4SSajid Ali requires: cuda 4838a53a0a4SSajid Ali 4848a53a0a4SSajid Ali test: 4858a53a0a4SSajid Ali suffix: 6 4868a53a0a4SSajid Ali args: -world2subs -vectype cuda 4878a53a0a4SSajid Ali requires: cuda 4888a53a0a4SSajid Ali 4898a53a0a4SSajid Ali test: 4908a53a0a4SSajid Ali suffix: 7 49197929ea7SJunchao Zhang args: -world2sub -sf_type neighbor 49297929ea7SJunchao Zhang output_file: output/ex9_1.out 493c6c723bbSStefano Zampini # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION) 494c6c723bbSStefano Zampini # segfaults with NECMPI 495c6c723bbSStefano Zampini requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI) 49697929ea7SJunchao Zhang TEST*/ 497