197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\ 297929ea7SJunchao Zhang 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\ 38a53a0a4SSajid Ali required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\ 48a53a0a4SSajid Ali To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n"; 597929ea7SJunchao Zhang 697929ea7SJunchao Zhang #include <petscvec.h> 797929ea7SJunchao Zhang int main(int argc,char **argv) 897929ea7SJunchao Zhang { 997929ea7SJunchao Zhang PetscMPIInt nproc,grank,mycolor; 1097929ea7SJunchao Zhang PetscInt i,n,N = 20,low,high; 1197929ea7SJunchao Zhang MPI_Comm subcomm; 128a53a0a4SSajid Ali Vec x = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 138a53a0a4SSajid Ali Vec yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 1497929ea7SJunchao Zhang VecScatter vscat; 1597929ea7SJunchao Zhang IS ix,iy; 168a53a0a4SSajid Ali PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */ 178a53a0a4SSajid Ali PetscBool optionflag, compareflag; 188a53a0a4SSajid Ali char vectypename[PETSC_MAX_PATH_LEN]; 1997929ea7SJunchao Zhang PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */ 2097929ea7SJunchao Zhang PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */ 2197929ea7SJunchao Zhang PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */ 2297929ea7SJunchao Zhang 23*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 245f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc)); 255f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank)); 2697929ea7SJunchao Zhang 272c71b3e2SJacob Faibussowitsch PetscCheckFalse(nproc < 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run"); 2897929ea7SJunchao Zhang 295f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL)); 305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL)); 315f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL)); 325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag)); 338a53a0a4SSajid Ali if (optionflag) { 345f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag)); 358a53a0a4SSajid Ali if (compareflag) iscuda = PETSC_TRUE; 368a53a0a4SSajid Ali } 3797929ea7SJunchao Zhang 3897929ea7SJunchao Zhang /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */ 3997929ea7SJunchao Zhang mycolor = grank % 3; 405f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm)); 4197929ea7SJunchao Zhang 4297929ea7SJunchao Zhang /*=========================================================================== 4397929ea7SJunchao Zhang * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on 4497929ea7SJunchao Zhang * a subcommunicator of PETSC_COMM_WORLD and vice versa. 4597929ea7SJunchao Zhang *===========================================================================*/ 4697929ea7SJunchao Zhang if (world2sub) { 475f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x)); 485f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N)); 498a53a0a4SSajid Ali if (iscuda) { 505f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECCUDA)); 518a53a0a4SSajid Ali } else { 525f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECSTANDARD)); 538a53a0a4SSajid Ali } 545f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(x)); 555f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */ 5697929ea7SJunchao Zhang 5797929ea7SJunchao Zhang /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */ 585f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(x,&low,&high)); 5997929ea7SJunchao Zhang for (i=low; i<high; i++) { 6097929ea7SJunchao Zhang PetscScalar val = -i; 615f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES)); 6297929ea7SJunchao Zhang } 635f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(x)); 645f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(x)); 6597929ea7SJunchao Zhang 6697929ea7SJunchao Zhang /* Transfer x to a vector y only defined on subcomm0 and vice versa */ 6797929ea7SJunchao Zhang if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */ 6897929ea7SJunchao Zhang Vec y; 6997929ea7SJunchao Zhang PetscScalar *yvalue; 705f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(subcomm, &y)); 715f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N)); 728a53a0a4SSajid Ali if (iscuda) { 735f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECCUDA)); 748a53a0a4SSajid Ali } else { 755f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECSTANDARD)); 768a53a0a4SSajid Ali } 775f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(y)); 785f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */ 795f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(y,&n)); 808a53a0a4SSajid Ali if (iscuda) { 818a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 825f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArray(y,&yvalue)); 838a53a0a4SSajid Ali #endif 848a53a0a4SSajid Ali } else { 855f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yvalue)); 868a53a0a4SSajid Ali } 8797929ea7SJunchao Zhang /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue. 8897929ea7SJunchao Zhang Note this is a collective call. All processes have to call it and supply consistent N. 8997929ea7SJunchao Zhang */ 908a53a0a4SSajid Ali if (iscuda) { 918a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 925f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg)); 938a53a0a4SSajid Ali #endif 948a53a0a4SSajid Ali } else { 955f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg)); 968a53a0a4SSajid Ali } 9797929ea7SJunchao Zhang 9897929ea7SJunchao Zhang /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */ 995f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */ 1005f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix)); 1015f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(ix,&iy)); 10297929ea7SJunchao Zhang 10397929ea7SJunchao Zhang /* Union of ix's on subcomm0 covers the full range of [0,N) */ 1045f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat)); 1055f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 1065f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 10797929ea7SJunchao Zhang 10897929ea7SJunchao Zhang /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations. 10997929ea7SJunchao Zhang VecGetArray must be paired with VecRestoreArray. 11097929ea7SJunchao Zhang */ 1118a53a0a4SSajid Ali if (iscuda) { 1128a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1135f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArray(y,&yvalue)); 1148a53a0a4SSajid Ali #endif 1158a53a0a4SSajid Ali } else { 1165f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yvalue)); 1178a53a0a4SSajid Ali } 11897929ea7SJunchao Zhang 11997929ea7SJunchao Zhang /* Libraries on subcomm0 can safely use y now, for example, view and scale it */ 1205f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm))); 1215f80ce2aSJacob Faibussowitsch CHKERRQ(VecScale(y,2.0)); 12297929ea7SJunchao Zhang 12397929ea7SJunchao Zhang /* Send the new y back to x */ 1245f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */ 12597929ea7SJunchao Zhang /* Supply new yvalue to yg without memory copying */ 1265f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(yg,yvalue)); 1275f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1285f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1295f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(yg)); 1308a53a0a4SSajid Ali if (iscuda) { 1318a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1325f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArray(y,&yvalue)); 1338a53a0a4SSajid Ali #endif 1348a53a0a4SSajid Ali } else { 1355f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yvalue)); 1368a53a0a4SSajid Ali } 13797929ea7SJunchao Zhang 1385f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 13997929ea7SJunchao Zhang } else { 14097929ea7SJunchao Zhang /* Ranks outside of subcomm0 do not supply values to yg */ 1418a53a0a4SSajid Ali if (iscuda) { 1428a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1435f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg)); 1448a53a0a4SSajid Ali #endif 1458a53a0a4SSajid Ali } else { 1465f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg)); 1478a53a0a4SSajid Ali } 14897929ea7SJunchao Zhang 14997929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. The remaining 15097929ea7SJunchao Zhang ranks just need to create empty ISes to cheat VecScatterCreate. 15197929ea7SJunchao Zhang */ 1525f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix)); 1535f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(ix,&iy)); 15497929ea7SJunchao Zhang 1555f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat)); 1565f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 1575f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 15897929ea7SJunchao Zhang 15997929ea7SJunchao Zhang /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send. 16097929ea7SJunchao Zhang But they have to call VecScatterBegin/End since these routines are collective. 16197929ea7SJunchao Zhang */ 1625f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 1635f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE)); 16497929ea7SJunchao Zhang } 16597929ea7SJunchao Zhang 1665f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 1675f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&ix)); 1685f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&iy)); 1695f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 1705f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&yg)); 1715f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&vscat)); 17297929ea7SJunchao Zhang } /* world2sub */ 17397929ea7SJunchao Zhang 17497929ea7SJunchao Zhang /*=========================================================================== 17597929ea7SJunchao Zhang * Transfer a vector x defined on subcomm0 to a vector y defined on 17697929ea7SJunchao Zhang * subcomm1. The two subcomms are not overlapping and their union is 17797929ea7SJunchao Zhang * not necessarily equal to PETSC_COMM_WORLD. 17897929ea7SJunchao Zhang *===========================================================================*/ 17997929ea7SJunchao Zhang if (sub2sub) { 18097929ea7SJunchao Zhang if (mycolor == 0) { 18197929ea7SJunchao Zhang /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */ 18297929ea7SJunchao Zhang PetscInt n,N = 22; 18397929ea7SJunchao Zhang Vec x,xg,yg; 18497929ea7SJunchao Zhang IS ix,iy; 18597929ea7SJunchao Zhang VecScatter vscat; 18697929ea7SJunchao Zhang const PetscScalar *xvalue; 18797929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 18897929ea7SJunchao Zhang PetscMPIInt lrank; 18997929ea7SJunchao Zhang 1905f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(subcomm,&lrank)); 1918a53a0a4SSajid Ali /* x is on subcomm */ 1925f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(subcomm, &x)); 1935f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N)); 1948a53a0a4SSajid Ali if (iscuda) { 1955f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECCUDA)); 1968a53a0a4SSajid Ali } else { 1975f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECSTANDARD)); 1988a53a0a4SSajid Ali } 1995f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(x)); 2005f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(x,&low,&high)); 20197929ea7SJunchao Zhang 20297929ea7SJunchao Zhang /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */ 20397929ea7SJunchao Zhang for (i=low; i<high; i++) { 20497929ea7SJunchao Zhang PetscScalar val = i; 2055f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES)); 20697929ea7SJunchao Zhang } 2075f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(x)); 2085f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(x)); 20997929ea7SJunchao Zhang 2105f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm)); 21197929ea7SJunchao Zhang 21297929ea7SJunchao Zhang /* Tell rank 0 of subcomm1 the global size of x */ 2135f80ce2aSJacob Faibussowitsch if (!lrank) CHKERRMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm)); 21497929ea7SJunchao Zhang 21597929ea7SJunchao Zhang /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm. 21697929ea7SJunchao Zhang But this order actually does not matter, since what we care is vector y, which is defined on subcomm1. 21797929ea7SJunchao Zhang */ 2185f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm)); 21997929ea7SJunchao Zhang 22097929ea7SJunchao Zhang /* Create a vector xg on parentcomm, which shares memory with x */ 2215f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(x,&n)); 2228a53a0a4SSajid Ali if (iscuda) { 2238a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2245f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArrayRead(x,&xvalue)); 2255f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg)); 2268a53a0a4SSajid Ali #endif 2278a53a0a4SSajid Ali } else { 2285f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x,&xvalue)); 2295f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg)); 2308a53a0a4SSajid Ali } 23197929ea7SJunchao Zhang 23297929ea7SJunchao Zhang /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */ 2338a53a0a4SSajid Ali if (iscuda) { 2348a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2355f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg)); 2368a53a0a4SSajid Ali #endif 2378a53a0a4SSajid Ali } else { 2385f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg)); 2398a53a0a4SSajid Ali } 24097929ea7SJunchao Zhang 24197929ea7SJunchao Zhang /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */ 2425f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */ 2435f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix)); 2445f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(ix,&iy)); 2455f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat)); 24697929ea7SJunchao Zhang 24797929ea7SJunchao Zhang /* Scatter values from xg to yg */ 2485f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 2495f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 25097929ea7SJunchao Zhang 25197929ea7SJunchao Zhang /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */ 2528a53a0a4SSajid Ali if (iscuda) { 2538a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2545f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArrayRead(x,&xvalue)); 2558a53a0a4SSajid Ali #endif 2568a53a0a4SSajid Ali } else { 2575f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x,&xvalue)); 2588a53a0a4SSajid Ali } 2595f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 2605f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&ix)); 2615f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&iy)); 2625f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&xg)); 2635f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&yg)); 2645f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&vscat)); 2655f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&intercomm)); 2665f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&parentcomm)); 26797929ea7SJunchao Zhang } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */ 26897929ea7SJunchao Zhang PetscInt n,N; 26997929ea7SJunchao Zhang Vec y,xg,yg; 27097929ea7SJunchao Zhang IS ix,iy; 27197929ea7SJunchao Zhang VecScatter vscat; 27297929ea7SJunchao Zhang PetscScalar *yvalue; 27397929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 27497929ea7SJunchao Zhang PetscMPIInt lrank; 27597929ea7SJunchao Zhang 2765f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(subcomm,&lrank)); 2775f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm)); 27897929ea7SJunchao Zhang 27997929ea7SJunchao Zhang /* Two rank-0 are talking */ 2805f80ce2aSJacob Faibussowitsch if (!lrank) CHKERRMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE)); 28197929ea7SJunchao Zhang /* Rank 0 of subcomm1 bcasts N to its members */ 2825f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm)); 28397929ea7SJunchao Zhang 28497929ea7SJunchao Zhang /* Create a intracomm Petsc can work on */ 2855f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm)); 28697929ea7SJunchao Zhang 28797929ea7SJunchao Zhang /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/ 2888a53a0a4SSajid Ali if (iscuda) { 2898a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2905f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg)); 2918a53a0a4SSajid Ali #endif 2928a53a0a4SSajid Ali } else { 2935f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg)); 2948a53a0a4SSajid Ali } 29597929ea7SJunchao Zhang 2965f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(subcomm, &y)); 2975f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N)); 2988a53a0a4SSajid Ali if (iscuda) { 2995f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECCUDA)); 3008a53a0a4SSajid Ali } else { 3015f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECSTANDARD)); 3028a53a0a4SSajid Ali } 3035f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(y)); 3048a53a0a4SSajid Ali 3055f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */ 3065f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(y,&n)); 3078a53a0a4SSajid Ali if (iscuda) { 3088a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3095f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArray(y,&yvalue)); 3108a53a0a4SSajid Ali #endif 3118a53a0a4SSajid Ali } else { 3125f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yvalue)); 3138a53a0a4SSajid Ali } 31497929ea7SJunchao Zhang /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be 31597929ea7SJunchao Zhang created in the same order in subcomm0/1. For example, we can not reverse the order of 31697929ea7SJunchao Zhang creating xg and yg in subcomm1. 31797929ea7SJunchao Zhang */ 3188a53a0a4SSajid Ali if (iscuda) { 3198a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3205f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg)); 3218a53a0a4SSajid Ali #endif 3228a53a0a4SSajid Ali } else { 3235f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg)); 3248a53a0a4SSajid Ali } 32597929ea7SJunchao Zhang 32697929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. 32797929ea7SJunchao Zhang ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate. 32897929ea7SJunchao Zhang */ 3295f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix)); 3305f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(ix,&iy)); 3315f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat)); 33297929ea7SJunchao Zhang 33397929ea7SJunchao Zhang /* Scatter values from xg to yg */ 3345f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 3355f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD)); 33697929ea7SJunchao Zhang 33797929ea7SJunchao Zhang /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */ 3388a53a0a4SSajid Ali if (iscuda) { 3398a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3405f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArray(y,&yvalue)); 3418a53a0a4SSajid Ali #endif 3428a53a0a4SSajid Ali } else { 3435f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yvalue)); 3448a53a0a4SSajid Ali } 34597929ea7SJunchao Zhang 34697929ea7SJunchao Zhang /* Libraries on subcomm1 can safely use y now, for example, view it */ 3475f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm))); 34897929ea7SJunchao Zhang 3495f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 3505f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&ix)); 3515f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&iy)); 3525f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&xg)); 3535f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&yg)); 3545f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&vscat)); 3555f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&intercomm)); 3565f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&parentcomm)); 35797929ea7SJunchao Zhang } else if (mycolor == 2) { /* subcomm2 */ 35897929ea7SJunchao Zhang /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */ 35997929ea7SJunchao Zhang } 36097929ea7SJunchao Zhang } /* sub2sub */ 36197929ea7SJunchao Zhang 36297929ea7SJunchao Zhang /*=========================================================================== 36397929ea7SJunchao Zhang * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on 36497929ea7SJunchao Zhang * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers 36597929ea7SJunchao Zhang * as we did in case 1, but that is not efficient. Instead, we use one vecscatter 36697929ea7SJunchao Zhang * to achieve that. 36797929ea7SJunchao Zhang *===========================================================================*/ 36897929ea7SJunchao Zhang if (world2subs) { 3698a53a0a4SSajid Ali Vec y; 37097929ea7SJunchao Zhang PetscInt n,N=15,xstart,ystart,low,high; 37197929ea7SJunchao Zhang PetscScalar *yvalue; 37297929ea7SJunchao Zhang 37397929ea7SJunchao Zhang /* Initialize x to [0, 1, 2, 3, ..., N-1] */ 3745f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x)); 3755f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N)); 3768a53a0a4SSajid Ali if (iscuda) { 3775f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECCUDA)); 3788a53a0a4SSajid Ali } else { 3795f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(x, VECSTANDARD)); 3808a53a0a4SSajid Ali } 3815f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(x)); 3825f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(x,&low,&high)); 3835f80ce2aSJacob Faibussowitsch for (i=low; i<high; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 3845f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(x)); 3855f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(x)); 38697929ea7SJunchao Zhang 38797929ea7SJunchao Zhang /* Every subcomm has a y as long as x */ 3885f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(subcomm, &y)); 3895f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N)); 3908a53a0a4SSajid Ali if (iscuda) { 3915f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECCUDA)); 3928a53a0a4SSajid Ali } else { 3935f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetType(y, VECSTANDARD)); 3948a53a0a4SSajid Ali } 3955f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(y)); 3965f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(y,&n)); 39797929ea7SJunchao Zhang 39897929ea7SJunchao Zhang /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators). 39997929ea7SJunchao Zhang Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not 40097929ea7SJunchao Zhang necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank 40197929ea7SJunchao Zhang 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg. 40297929ea7SJunchao Zhang */ 4038a53a0a4SSajid Ali if (iscuda) { 4048a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4055f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArray(y,&yvalue)); 4065f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg)); 4078a53a0a4SSajid Ali #endif 4088a53a0a4SSajid Ali } else { 4095f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yvalue)); 4105f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg)); 4118a53a0a4SSajid Ali } 4125f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */ 41397929ea7SJunchao Zhang 41497929ea7SJunchao Zhang /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y, 41597929ea7SJunchao Zhang since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg. 41697929ea7SJunchao Zhang */ 4175f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(y,&xstart,NULL)); 4185f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(yg,&ystart,NULL)); 41997929ea7SJunchao Zhang 4205f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix)); 4215f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy)); 4225f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat)); 4235f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 4245f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD)); 42597929ea7SJunchao Zhang 42697929ea7SJunchao Zhang /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */ 4275f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(yg,PETSC_VIEWER_STDOUT_WORLD)); 4285f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&yg)); 42997929ea7SJunchao Zhang 43097929ea7SJunchao Zhang /* Restory yvalue so that processes in subcomm can use y from now on. */ 4318a53a0a4SSajid Ali if (iscuda) { 4328a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4335f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArray(y,&yvalue)); 4348a53a0a4SSajid Ali #endif 4358a53a0a4SSajid Ali } else { 4365f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yvalue)); 4378a53a0a4SSajid Ali } 4385f80ce2aSJacob Faibussowitsch CHKERRQ(VecScale(y,3.0)); 43997929ea7SJunchao Zhang 4405f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */ 4415f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&iy)); 4425f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 4435f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 4445f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&vscat)); 44597929ea7SJunchao Zhang } /* world2subs */ 44697929ea7SJunchao Zhang 4475f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&subcomm)); 448*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 449*b122ec5aSJacob Faibussowitsch return 0; 45097929ea7SJunchao Zhang } 45197929ea7SJunchao Zhang 45297929ea7SJunchao Zhang /*TEST 45397929ea7SJunchao Zhang 45497929ea7SJunchao Zhang build: 455dfd57a17SPierre Jolivet requires: !defined(PETSC_HAVE_MPIUNI) 456c6c723bbSStefano Zampini 45797929ea7SJunchao Zhang testset: 45897929ea7SJunchao Zhang nsize: 7 45997929ea7SJunchao Zhang 46097929ea7SJunchao Zhang test: 46197929ea7SJunchao Zhang suffix: 1 46297929ea7SJunchao Zhang args: -world2sub 46397929ea7SJunchao Zhang 46497929ea7SJunchao Zhang test: 46597929ea7SJunchao Zhang suffix: 2 46697929ea7SJunchao Zhang args: -sub2sub 467f424265bSStefano Zampini # deadlocks with NECMPI and INTELMPI (20210400300) 468f424265bSStefano Zampini requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION) 46997929ea7SJunchao Zhang 47097929ea7SJunchao Zhang test: 47197929ea7SJunchao Zhang suffix: 3 47297929ea7SJunchao Zhang args: -world2subs 47397929ea7SJunchao Zhang 47497929ea7SJunchao Zhang test: 47597929ea7SJunchao Zhang suffix: 4 4768a53a0a4SSajid Ali args: -world2sub -vectype cuda 4778a53a0a4SSajid Ali requires: cuda 4788a53a0a4SSajid Ali 4798a53a0a4SSajid Ali test: 4808a53a0a4SSajid Ali suffix: 5 4818a53a0a4SSajid Ali args: -sub2sub -vectype cuda 4828a53a0a4SSajid Ali requires: cuda 4838a53a0a4SSajid Ali 4848a53a0a4SSajid Ali test: 4858a53a0a4SSajid Ali suffix: 6 4868a53a0a4SSajid Ali args: -world2subs -vectype cuda 4878a53a0a4SSajid Ali requires: cuda 4888a53a0a4SSajid Ali 4898a53a0a4SSajid Ali test: 4908a53a0a4SSajid Ali suffix: 7 49197929ea7SJunchao Zhang args: -world2sub -sf_type neighbor 49297929ea7SJunchao Zhang output_file: output/ex9_1.out 493c6c723bbSStefano Zampini # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION) 494c6c723bbSStefano Zampini # segfaults with NECMPI 495c6c723bbSStefano Zampini requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI) 49697929ea7SJunchao Zhang TEST*/ 497