xref: /petsc/src/vec/is/sf/tests/ex9.c (revision 9566063d113dddea24716c546802770db7481bc0)
197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
297929ea7SJunchao Zhang   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
38a53a0a4SSajid Ali   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
48a53a0a4SSajid Ali   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
597929ea7SJunchao Zhang 
697929ea7SJunchao Zhang #include <petscvec.h>
797929ea7SJunchao Zhang int main(int argc,char **argv)
897929ea7SJunchao Zhang {
997929ea7SJunchao Zhang   PetscMPIInt    nproc,grank,mycolor;
1097929ea7SJunchao Zhang   PetscInt       i,n,N = 20,low,high;
1197929ea7SJunchao Zhang   MPI_Comm       subcomm;
128a53a0a4SSajid Ali   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
138a53a0a4SSajid Ali   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
1497929ea7SJunchao Zhang   VecScatter     vscat;
1597929ea7SJunchao Zhang   IS             ix,iy;
168a53a0a4SSajid Ali   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
178a53a0a4SSajid Ali   PetscBool      optionflag, compareflag;
188a53a0a4SSajid Ali   char           vectypename[PETSC_MAX_PATH_LEN];
1997929ea7SJunchao Zhang   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
2097929ea7SJunchao Zhang   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
2197929ea7SJunchao Zhang   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */
2297929ea7SJunchao Zhang 
23*9566063dSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc,&argv,(char*)0,help));
24*9566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc));
25*9566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank));
2697929ea7SJunchao Zhang 
272c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nproc < 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run");
2897929ea7SJunchao Zhang 
29*9566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL));
30*9566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL));
31*9566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL));
32*9566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag));
338a53a0a4SSajid Ali   if (optionflag) {
34*9566063dSJacob Faibussowitsch     PetscCall(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag));
358a53a0a4SSajid Ali     if (compareflag) iscuda = PETSC_TRUE;
368a53a0a4SSajid Ali   }
3797929ea7SJunchao Zhang 
3897929ea7SJunchao Zhang   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
3997929ea7SJunchao Zhang   mycolor = grank % 3;
40*9566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm));
4197929ea7SJunchao Zhang 
4297929ea7SJunchao Zhang   /*===========================================================================
4397929ea7SJunchao Zhang    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
4497929ea7SJunchao Zhang    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
4597929ea7SJunchao Zhang    *===========================================================================*/
4697929ea7SJunchao Zhang   if (world2sub) {
47*9566063dSJacob Faibussowitsch     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
48*9566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
498a53a0a4SSajid Ali     if (iscuda) {
50*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECCUDA));
518a53a0a4SSajid Ali     } else {
52*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECSTANDARD));
538a53a0a4SSajid Ali     }
54*9566063dSJacob Faibussowitsch     PetscCall(VecSetUp(x));
55*9566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */
5697929ea7SJunchao Zhang 
5797929ea7SJunchao Zhang     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
58*9566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(x,&low,&high));
5997929ea7SJunchao Zhang     for (i=low; i<high; i++) {
6097929ea7SJunchao Zhang       PetscScalar val = -i;
61*9566063dSJacob Faibussowitsch       PetscCall(VecSetValue(x,i,val,INSERT_VALUES));
6297929ea7SJunchao Zhang     }
63*9566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(x));
64*9566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(x));
6597929ea7SJunchao Zhang 
6697929ea7SJunchao Zhang     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
6797929ea7SJunchao Zhang     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
6897929ea7SJunchao Zhang       Vec         y;
6997929ea7SJunchao Zhang       PetscScalar *yvalue;
70*9566063dSJacob Faibussowitsch        PetscCall(VecCreate(subcomm, &y));
71*9566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
728a53a0a4SSajid Ali       if (iscuda) {
73*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECCUDA));
748a53a0a4SSajid Ali       } else {
75*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECSTANDARD));
768a53a0a4SSajid Ali       }
77*9566063dSJacob Faibussowitsch       PetscCall(VecSetUp(y));
78*9566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */
79*9566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(y,&n));
808a53a0a4SSajid Ali       if (iscuda) {
818a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
82*9566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArray(y,&yvalue));
838a53a0a4SSajid Ali         #endif
848a53a0a4SSajid Ali       } else {
85*9566063dSJacob Faibussowitsch         PetscCall(VecGetArray(y,&yvalue));
868a53a0a4SSajid Ali       }
8797929ea7SJunchao Zhang       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
8897929ea7SJunchao Zhang         Note this is a collective call. All processes have to call it and supply consistent N.
8997929ea7SJunchao Zhang       */
908a53a0a4SSajid Ali       if (iscuda) {
918a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
92*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
938a53a0a4SSajid Ali         #endif
948a53a0a4SSajid Ali       } else {
95*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
968a53a0a4SSajid Ali       }
9797929ea7SJunchao Zhang 
9897929ea7SJunchao Zhang       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
99*9566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */
100*9566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
101*9566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
10297929ea7SJunchao Zhang 
10397929ea7SJunchao Zhang       /* Union of ix's on subcomm0 covers the full range of [0,N) */
104*9566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
105*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
106*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
10797929ea7SJunchao Zhang 
10897929ea7SJunchao Zhang       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
10997929ea7SJunchao Zhang         VecGetArray must be paired with VecRestoreArray.
11097929ea7SJunchao Zhang       */
1118a53a0a4SSajid Ali       if (iscuda) {
1128a53a0a4SSajid Ali          #if defined(PETSC_HAVE_CUDA)
113*9566063dSJacob Faibussowitsch            PetscCall(VecCUDARestoreArray(y,&yvalue));
1148a53a0a4SSajid Ali          #endif
1158a53a0a4SSajid Ali       } else {
116*9566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
1178a53a0a4SSajid Ali       }
11897929ea7SJunchao Zhang 
11997929ea7SJunchao Zhang       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
120*9566063dSJacob Faibussowitsch       PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
121*9566063dSJacob Faibussowitsch       PetscCall(VecScale(y,2.0));
12297929ea7SJunchao Zhang 
12397929ea7SJunchao Zhang       /* Send the new y back to x */
124*9566063dSJacob Faibussowitsch       PetscCall(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
12597929ea7SJunchao Zhang       /* Supply new yvalue to yg without memory copying */
126*9566063dSJacob Faibussowitsch       PetscCall(VecPlaceArray(yg,yvalue));
127*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
128*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
129*9566063dSJacob Faibussowitsch       PetscCall(VecResetArray(yg));
1308a53a0a4SSajid Ali       if (iscuda) {
1318a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
132*9566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArray(y,&yvalue));
1338a53a0a4SSajid Ali         #endif
1348a53a0a4SSajid Ali       } else {
135*9566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
1368a53a0a4SSajid Ali       }
13797929ea7SJunchao Zhang 
138*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&y));
13997929ea7SJunchao Zhang     } else {
14097929ea7SJunchao Zhang       /* Ranks outside of subcomm0 do not supply values to yg */
1418a53a0a4SSajid Ali       if (iscuda) {
1428a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
143*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1448a53a0a4SSajid Ali         #endif
1458a53a0a4SSajid Ali       } else {
146*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1478a53a0a4SSajid Ali       }
14897929ea7SJunchao Zhang 
14997929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
15097929ea7SJunchao Zhang         ranks just need to create empty ISes to cheat VecScatterCreate.
15197929ea7SJunchao Zhang       */
152*9566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
153*9566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
15497929ea7SJunchao Zhang 
155*9566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
156*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
157*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
15897929ea7SJunchao Zhang 
15997929ea7SJunchao Zhang       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
16097929ea7SJunchao Zhang         But they have to call VecScatterBegin/End since these routines are collective.
16197929ea7SJunchao Zhang       */
162*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
163*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
16497929ea7SJunchao Zhang     }
16597929ea7SJunchao Zhang 
166*9566063dSJacob Faibussowitsch     PetscCall(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
167*9566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&ix));
168*9566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iy));
169*9566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&x));
170*9566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&yg));
171*9566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vscat));
17297929ea7SJunchao Zhang   } /* world2sub */
17397929ea7SJunchao Zhang 
17497929ea7SJunchao Zhang   /*===========================================================================
17597929ea7SJunchao Zhang    *  Transfer a vector x defined on subcomm0 to a vector y defined on
17697929ea7SJunchao Zhang    *  subcomm1. The two subcomms are not overlapping and their union is
17797929ea7SJunchao Zhang    *  not necessarily equal to PETSC_COMM_WORLD.
17897929ea7SJunchao Zhang    *===========================================================================*/
17997929ea7SJunchao Zhang   if (sub2sub) {
18097929ea7SJunchao Zhang     if (mycolor == 0) {
18197929ea7SJunchao Zhang       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
18297929ea7SJunchao Zhang       PetscInt          n,N = 22;
18397929ea7SJunchao Zhang       Vec               x,xg,yg;
18497929ea7SJunchao Zhang       IS                ix,iy;
18597929ea7SJunchao Zhang       VecScatter        vscat;
18697929ea7SJunchao Zhang       const PetscScalar *xvalue;
18797929ea7SJunchao Zhang       MPI_Comm          intercomm,parentcomm;
18897929ea7SJunchao Zhang       PetscMPIInt       lrank;
18997929ea7SJunchao Zhang 
190*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(subcomm,&lrank));
1918a53a0a4SSajid Ali       /* x is on subcomm */
192*9566063dSJacob Faibussowitsch       PetscCall(VecCreate(subcomm, &x));
193*9566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
1948a53a0a4SSajid Ali       if (iscuda) {
195*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(x, VECCUDA));
1968a53a0a4SSajid Ali       } else {
197*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(x, VECSTANDARD));
1988a53a0a4SSajid Ali       }
199*9566063dSJacob Faibussowitsch       PetscCall(VecSetUp(x));
200*9566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(x,&low,&high));
20197929ea7SJunchao Zhang 
20297929ea7SJunchao Zhang       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
20397929ea7SJunchao Zhang       for (i=low; i<high; i++) {
20497929ea7SJunchao Zhang         PetscScalar val = i;
205*9566063dSJacob Faibussowitsch         PetscCall(VecSetValue(x,i,val,INSERT_VALUES));
20697929ea7SJunchao Zhang       }
207*9566063dSJacob Faibussowitsch       PetscCall(VecAssemblyBegin(x));
208*9566063dSJacob Faibussowitsch       PetscCall(VecAssemblyEnd(x));
20997929ea7SJunchao Zhang 
210*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm));
21197929ea7SJunchao Zhang 
21297929ea7SJunchao Zhang       /* Tell rank 0 of subcomm1 the global size of x */
213*9566063dSJacob Faibussowitsch       if (!lrank) PetscCallMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm));
21497929ea7SJunchao Zhang 
21597929ea7SJunchao Zhang       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
21697929ea7SJunchao Zhang         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
21797929ea7SJunchao Zhang       */
218*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm));
21997929ea7SJunchao Zhang 
22097929ea7SJunchao Zhang       /* Create a vector xg on parentcomm, which shares memory with x */
221*9566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(x,&n));
2228a53a0a4SSajid Ali       if (iscuda) {
2238a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
224*9566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArrayRead(x,&xvalue));
225*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg));
2268a53a0a4SSajid Ali         #endif
2278a53a0a4SSajid Ali       } else {
228*9566063dSJacob Faibussowitsch         PetscCall(VecGetArrayRead(x,&xvalue));
229*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg));
2308a53a0a4SSajid Ali       }
23197929ea7SJunchao Zhang 
23297929ea7SJunchao Zhang       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
2338a53a0a4SSajid Ali       if (iscuda) {
2348a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
235*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2368a53a0a4SSajid Ali         #endif
2378a53a0a4SSajid Ali       } else {
238*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2398a53a0a4SSajid Ali       }
24097929ea7SJunchao Zhang 
24197929ea7SJunchao Zhang       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
242*9566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */
243*9566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
244*9566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
245*9566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat));
24697929ea7SJunchao Zhang 
24797929ea7SJunchao Zhang       /* Scatter values from xg to yg */
248*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
249*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
25097929ea7SJunchao Zhang 
25197929ea7SJunchao Zhang       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
2528a53a0a4SSajid Ali       if (iscuda) {
2538a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
254*9566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArrayRead(x,&xvalue));
2558a53a0a4SSajid Ali         #endif
2568a53a0a4SSajid Ali       } else {
257*9566063dSJacob Faibussowitsch         PetscCall(VecRestoreArrayRead(x,&xvalue));
2588a53a0a4SSajid Ali       }
259*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&x));
260*9566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&ix));
261*9566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iy));
262*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&xg));
263*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&yg));
264*9566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vscat));
265*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&intercomm));
266*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&parentcomm));
26797929ea7SJunchao Zhang     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
26897929ea7SJunchao Zhang       PetscInt    n,N;
26997929ea7SJunchao Zhang       Vec         y,xg,yg;
27097929ea7SJunchao Zhang       IS          ix,iy;
27197929ea7SJunchao Zhang       VecScatter  vscat;
27297929ea7SJunchao Zhang       PetscScalar *yvalue;
27397929ea7SJunchao Zhang       MPI_Comm    intercomm,parentcomm;
27497929ea7SJunchao Zhang       PetscMPIInt lrank;
27597929ea7SJunchao Zhang 
276*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(subcomm,&lrank));
277*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm));
27897929ea7SJunchao Zhang 
27997929ea7SJunchao Zhang       /* Two rank-0 are talking */
280*9566063dSJacob Faibussowitsch       if (!lrank) PetscCallMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE));
28197929ea7SJunchao Zhang       /* Rank 0 of subcomm1 bcasts N to its members */
282*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm));
28397929ea7SJunchao Zhang 
28497929ea7SJunchao Zhang       /* Create a intracomm Petsc can work on */
285*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm));
28697929ea7SJunchao Zhang 
28797929ea7SJunchao Zhang       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
2888a53a0a4SSajid Ali       if (iscuda) {
2898a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
290*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2918a53a0a4SSajid Ali         #endif
2928a53a0a4SSajid Ali       } else {
293*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2948a53a0a4SSajid Ali       }
29597929ea7SJunchao Zhang 
296*9566063dSJacob Faibussowitsch       PetscCall(VecCreate(subcomm, &y));
297*9566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
2988a53a0a4SSajid Ali       if (iscuda) {
299*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECCUDA));
3008a53a0a4SSajid Ali       } else {
301*9566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECSTANDARD));
3028a53a0a4SSajid Ali       }
303*9566063dSJacob Faibussowitsch       PetscCall(VecSetUp(y));
3048a53a0a4SSajid Ali 
305*9566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */
306*9566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(y,&n));
3078a53a0a4SSajid Ali       if (iscuda) {
3088a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
309*9566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArray(y,&yvalue));
3108a53a0a4SSajid Ali         #endif
3118a53a0a4SSajid Ali       } else {
312*9566063dSJacob Faibussowitsch         PetscCall(VecGetArray(y,&yvalue));
3138a53a0a4SSajid Ali       }
31497929ea7SJunchao Zhang       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
31597929ea7SJunchao Zhang         created in the same order in subcomm0/1. For example, we can not reverse the order of
31697929ea7SJunchao Zhang         creating xg and yg in subcomm1.
31797929ea7SJunchao Zhang       */
3188a53a0a4SSajid Ali       if (iscuda) {
3198a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
320*9566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3218a53a0a4SSajid Ali         #endif
3228a53a0a4SSajid Ali       } else {
323*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3248a53a0a4SSajid Ali       }
32597929ea7SJunchao Zhang 
32697929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map.
32797929ea7SJunchao Zhang         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
32897929ea7SJunchao Zhang       */
329*9566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
330*9566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
331*9566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat));
33297929ea7SJunchao Zhang 
33397929ea7SJunchao Zhang       /* Scatter values from xg to yg */
334*9566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
335*9566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
33697929ea7SJunchao Zhang 
33797929ea7SJunchao Zhang       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
3388a53a0a4SSajid Ali       if (iscuda) {
3398a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
340*9566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArray(y,&yvalue));
3418a53a0a4SSajid Ali         #endif
3428a53a0a4SSajid Ali       } else {
343*9566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
3448a53a0a4SSajid Ali       }
34597929ea7SJunchao Zhang 
34697929ea7SJunchao Zhang       /* Libraries on subcomm1 can safely use y now, for example, view it */
347*9566063dSJacob Faibussowitsch       PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
34897929ea7SJunchao Zhang 
349*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&y));
350*9566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&ix));
351*9566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iy));
352*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&xg));
353*9566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&yg));
354*9566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vscat));
355*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&intercomm));
356*9566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&parentcomm));
35797929ea7SJunchao Zhang     } else if (mycolor == 2) { /* subcomm2 */
35897929ea7SJunchao Zhang       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
35997929ea7SJunchao Zhang     }
36097929ea7SJunchao Zhang   } /* sub2sub */
36197929ea7SJunchao Zhang 
36297929ea7SJunchao Zhang   /*===========================================================================
36397929ea7SJunchao Zhang    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
36497929ea7SJunchao Zhang    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
36597929ea7SJunchao Zhang    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
36697929ea7SJunchao Zhang    *  to achieve that.
36797929ea7SJunchao Zhang    *===========================================================================*/
36897929ea7SJunchao Zhang   if (world2subs) {
3698a53a0a4SSajid Ali     Vec         y;
37097929ea7SJunchao Zhang     PetscInt    n,N=15,xstart,ystart,low,high;
37197929ea7SJunchao Zhang     PetscScalar *yvalue;
37297929ea7SJunchao Zhang 
37397929ea7SJunchao Zhang     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
374*9566063dSJacob Faibussowitsch     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
375*9566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
3768a53a0a4SSajid Ali     if (iscuda) {
377*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECCUDA));
3788a53a0a4SSajid Ali     } else {
379*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECSTANDARD));
3808a53a0a4SSajid Ali     }
381*9566063dSJacob Faibussowitsch     PetscCall(VecSetUp(x));
382*9566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(x,&low,&high));
383*9566063dSJacob Faibussowitsch     for (i=low; i<high; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES));
384*9566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(x));
385*9566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(x));
38697929ea7SJunchao Zhang 
38797929ea7SJunchao Zhang     /* Every subcomm has a y as long as x */
388*9566063dSJacob Faibussowitsch     PetscCall(VecCreate(subcomm, &y));
389*9566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
3908a53a0a4SSajid Ali     if (iscuda) {
391*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(y, VECCUDA));
3928a53a0a4SSajid Ali     } else {
393*9566063dSJacob Faibussowitsch       PetscCall(VecSetType(y, VECSTANDARD));
3948a53a0a4SSajid Ali     }
395*9566063dSJacob Faibussowitsch     PetscCall(VecSetUp(y));
396*9566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(y,&n));
39797929ea7SJunchao Zhang 
39897929ea7SJunchao Zhang     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
39997929ea7SJunchao Zhang        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
40097929ea7SJunchao Zhang        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
40197929ea7SJunchao Zhang        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
40297929ea7SJunchao Zhang     */
4038a53a0a4SSajid Ali     if (iscuda) {
4048a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
405*9566063dSJacob Faibussowitsch         PetscCall(VecCUDAGetArray(y,&yvalue));
406*9566063dSJacob Faibussowitsch         PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4078a53a0a4SSajid Ali       #endif
4088a53a0a4SSajid Ali     } else {
409*9566063dSJacob Faibussowitsch       PetscCall(VecGetArray(y,&yvalue));
410*9566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4118a53a0a4SSajid Ali     }
412*9566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */
41397929ea7SJunchao Zhang 
41497929ea7SJunchao Zhang     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
41597929ea7SJunchao Zhang        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
41697929ea7SJunchao Zhang      */
417*9566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(y,&xstart,NULL));
418*9566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(yg,&ystart,NULL));
41997929ea7SJunchao Zhang 
420*9566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix));
421*9566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy));
422*9566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
423*9566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
424*9566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
42597929ea7SJunchao Zhang 
42697929ea7SJunchao Zhang     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
427*9566063dSJacob Faibussowitsch     PetscCall(VecView(yg,PETSC_VIEWER_STDOUT_WORLD));
428*9566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&yg));
42997929ea7SJunchao Zhang 
43097929ea7SJunchao Zhang     /* Restory yvalue so that processes in subcomm can use y from now on. */
4318a53a0a4SSajid Ali     if (iscuda) {
4328a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
433*9566063dSJacob Faibussowitsch         PetscCall(VecCUDARestoreArray(y,&yvalue));
4348a53a0a4SSajid Ali       #endif
4358a53a0a4SSajid Ali     } else {
436*9566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(y,&yvalue));
4378a53a0a4SSajid Ali     }
438*9566063dSJacob Faibussowitsch     PetscCall(VecScale(y,3.0));
43997929ea7SJunchao Zhang 
440*9566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */
441*9566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iy));
442*9566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&x));
443*9566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&y));
444*9566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vscat));
44597929ea7SJunchao Zhang   } /* world2subs */
44697929ea7SJunchao Zhang 
447*9566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&subcomm));
448*9566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
449b122ec5aSJacob Faibussowitsch   return 0;
45097929ea7SJunchao Zhang }
45197929ea7SJunchao Zhang 
45297929ea7SJunchao Zhang /*TEST
45397929ea7SJunchao Zhang 
45497929ea7SJunchao Zhang    build:
455dfd57a17SPierre Jolivet      requires: !defined(PETSC_HAVE_MPIUNI)
456c6c723bbSStefano Zampini 
45797929ea7SJunchao Zhang    testset:
45897929ea7SJunchao Zhang      nsize: 7
45997929ea7SJunchao Zhang 
46097929ea7SJunchao Zhang      test:
46197929ea7SJunchao Zhang        suffix: 1
46297929ea7SJunchao Zhang        args: -world2sub
46397929ea7SJunchao Zhang 
46497929ea7SJunchao Zhang      test:
46597929ea7SJunchao Zhang        suffix: 2
46697929ea7SJunchao Zhang        args: -sub2sub
467f424265bSStefano Zampini        # deadlocks with NECMPI and INTELMPI (20210400300)
468f424265bSStefano Zampini        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
46997929ea7SJunchao Zhang 
47097929ea7SJunchao Zhang      test:
47197929ea7SJunchao Zhang        suffix: 3
47297929ea7SJunchao Zhang        args: -world2subs
47397929ea7SJunchao Zhang 
47497929ea7SJunchao Zhang      test:
47597929ea7SJunchao Zhang        suffix: 4
4768a53a0a4SSajid Ali        args: -world2sub -vectype cuda
4778a53a0a4SSajid Ali        requires: cuda
4788a53a0a4SSajid Ali 
4798a53a0a4SSajid Ali      test:
4808a53a0a4SSajid Ali        suffix: 5
4818a53a0a4SSajid Ali        args: -sub2sub -vectype cuda
4828a53a0a4SSajid Ali        requires: cuda
4838a53a0a4SSajid Ali 
4848a53a0a4SSajid Ali      test:
4858a53a0a4SSajid Ali       suffix: 6
4868a53a0a4SSajid Ali       args: -world2subs -vectype cuda
4878a53a0a4SSajid Ali       requires: cuda
4888a53a0a4SSajid Ali 
4898a53a0a4SSajid Ali      test:
4908a53a0a4SSajid Ali        suffix: 7
49197929ea7SJunchao Zhang        args: -world2sub -sf_type neighbor
49297929ea7SJunchao Zhang        output_file: output/ex9_1.out
493c6c723bbSStefano Zampini        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
494c6c723bbSStefano Zampini        # segfaults with NECMPI
495c6c723bbSStefano Zampini        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
49697929ea7SJunchao Zhang TEST*/
497