xref: /petsc/src/vec/is/sf/tests/ex9.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
297929ea7SJunchao Zhang   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
38a53a0a4SSajid Ali   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
48a53a0a4SSajid Ali   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
597929ea7SJunchao Zhang 
697929ea7SJunchao Zhang #include <petscvec.h>
797929ea7SJunchao Zhang int main(int argc,char **argv)
897929ea7SJunchao Zhang {
997929ea7SJunchao Zhang   PetscErrorCode ierr;
1097929ea7SJunchao Zhang   PetscMPIInt    nproc,grank,mycolor;
1197929ea7SJunchao Zhang   PetscInt       i,n,N = 20,low,high;
1297929ea7SJunchao Zhang   MPI_Comm       subcomm;
138a53a0a4SSajid Ali   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
148a53a0a4SSajid Ali   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
1597929ea7SJunchao Zhang   VecScatter     vscat;
1697929ea7SJunchao Zhang   IS             ix,iy;
178a53a0a4SSajid Ali   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
188a53a0a4SSajid Ali   PetscBool      optionflag, compareflag;
198a53a0a4SSajid Ali   char           vectypename[PETSC_MAX_PATH_LEN];
2097929ea7SJunchao Zhang   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
2197929ea7SJunchao Zhang   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
2297929ea7SJunchao Zhang   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */
2397929ea7SJunchao Zhang 
2497929ea7SJunchao Zhang   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
25*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc));
26*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank));
2797929ea7SJunchao Zhang 
282c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nproc < 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run");
2997929ea7SJunchao Zhang 
30*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL));
31*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL));
32*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL));
33*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag));
348a53a0a4SSajid Ali   if (optionflag) {
35*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag));
368a53a0a4SSajid Ali     if (compareflag) iscuda = PETSC_TRUE;
378a53a0a4SSajid Ali   }
3897929ea7SJunchao Zhang 
3997929ea7SJunchao Zhang   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
4097929ea7SJunchao Zhang   mycolor = grank % 3;
41*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm));
4297929ea7SJunchao Zhang 
4397929ea7SJunchao Zhang   /*===========================================================================
4497929ea7SJunchao Zhang    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
4597929ea7SJunchao Zhang    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
4697929ea7SJunchao Zhang    *===========================================================================*/
4797929ea7SJunchao Zhang   if (world2sub) {
48*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x));
49*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
508a53a0a4SSajid Ali     if (iscuda) {
51*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECCUDA));
528a53a0a4SSajid Ali     } else {
53*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECSTANDARD));
548a53a0a4SSajid Ali     }
55*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(x));
56*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */
5797929ea7SJunchao Zhang 
5897929ea7SJunchao Zhang     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
59*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(x,&low,&high));
6097929ea7SJunchao Zhang     for (i=low; i<high; i++) {
6197929ea7SJunchao Zhang       PetscScalar val = -i;
62*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES));
6397929ea7SJunchao Zhang     }
64*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyBegin(x));
65*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyEnd(x));
6697929ea7SJunchao Zhang 
6797929ea7SJunchao Zhang     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
6897929ea7SJunchao Zhang     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
6997929ea7SJunchao Zhang       Vec         y;
7097929ea7SJunchao Zhang       PetscScalar *yvalue;
71*5f80ce2aSJacob Faibussowitsch        CHKERRQ(VecCreate(subcomm, &y));
72*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
738a53a0a4SSajid Ali       if (iscuda) {
74*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECCUDA));
758a53a0a4SSajid Ali       } else {
76*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECSTANDARD));
778a53a0a4SSajid Ali       }
78*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(y));
79*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */
80*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(y,&n));
818a53a0a4SSajid Ali       if (iscuda) {
828a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
83*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArray(y,&yvalue));
848a53a0a4SSajid Ali         #endif
858a53a0a4SSajid Ali       } else {
86*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(y,&yvalue));
878a53a0a4SSajid Ali       }
8897929ea7SJunchao Zhang       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
8997929ea7SJunchao Zhang         Note this is a collective call. All processes have to call it and supply consistent N.
9097929ea7SJunchao Zhang       */
918a53a0a4SSajid Ali       if (iscuda) {
928a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
93*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
948a53a0a4SSajid Ali         #endif
958a53a0a4SSajid Ali       } else {
96*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
978a53a0a4SSajid Ali       }
9897929ea7SJunchao Zhang 
9997929ea7SJunchao Zhang       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
100*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */
101*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
102*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
10397929ea7SJunchao Zhang 
10497929ea7SJunchao Zhang       /* Union of ix's on subcomm0 covers the full range of [0,N) */
105*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
106*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
107*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
10897929ea7SJunchao Zhang 
10997929ea7SJunchao Zhang       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
11097929ea7SJunchao Zhang         VecGetArray must be paired with VecRestoreArray.
11197929ea7SJunchao Zhang       */
1128a53a0a4SSajid Ali       if (iscuda) {
1138a53a0a4SSajid Ali          #if defined(PETSC_HAVE_CUDA)
114*5f80ce2aSJacob Faibussowitsch            CHKERRQ(VecCUDARestoreArray(y,&yvalue));
1158a53a0a4SSajid Ali          #endif
1168a53a0a4SSajid Ali       } else {
117*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
1188a53a0a4SSajid Ali       }
11997929ea7SJunchao Zhang 
12097929ea7SJunchao Zhang       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
121*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
122*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScale(y,2.0));
12397929ea7SJunchao Zhang 
12497929ea7SJunchao Zhang       /* Send the new y back to x */
125*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
12697929ea7SJunchao Zhang       /* Supply new yvalue to yg without memory copying */
127*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecPlaceArray(yg,yvalue));
128*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
129*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
130*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecResetArray(yg));
1318a53a0a4SSajid Ali       if (iscuda) {
1328a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
133*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArray(y,&yvalue));
1348a53a0a4SSajid Ali         #endif
1358a53a0a4SSajid Ali       } else {
136*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
1378a53a0a4SSajid Ali       }
13897929ea7SJunchao Zhang 
139*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&y));
14097929ea7SJunchao Zhang     } else {
14197929ea7SJunchao Zhang       /* Ranks outside of subcomm0 do not supply values to yg */
1428a53a0a4SSajid Ali       if (iscuda) {
1438a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
144*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1458a53a0a4SSajid Ali         #endif
1468a53a0a4SSajid Ali       } else {
147*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1488a53a0a4SSajid Ali       }
14997929ea7SJunchao Zhang 
15097929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
15197929ea7SJunchao Zhang         ranks just need to create empty ISes to cheat VecScatterCreate.
15297929ea7SJunchao Zhang       */
153*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
154*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
15597929ea7SJunchao Zhang 
156*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
157*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
158*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
15997929ea7SJunchao Zhang 
16097929ea7SJunchao Zhang       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
16197929ea7SJunchao Zhang         But they have to call VecScatterBegin/End since these routines are collective.
16297929ea7SJunchao Zhang       */
163*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
164*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
16597929ea7SJunchao Zhang     }
16697929ea7SJunchao Zhang 
167*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
168*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&ix));
169*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&iy));
170*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&x));
171*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&yg));
172*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterDestroy(&vscat));
17397929ea7SJunchao Zhang   } /* world2sub */
17497929ea7SJunchao Zhang 
17597929ea7SJunchao Zhang   /*===========================================================================
17697929ea7SJunchao Zhang    *  Transfer a vector x defined on subcomm0 to a vector y defined on
17797929ea7SJunchao Zhang    *  subcomm1. The two subcomms are not overlapping and their union is
17897929ea7SJunchao Zhang    *  not necessarily equal to PETSC_COMM_WORLD.
17997929ea7SJunchao Zhang    *===========================================================================*/
18097929ea7SJunchao Zhang   if (sub2sub) {
18197929ea7SJunchao Zhang     if (mycolor == 0) {
18297929ea7SJunchao Zhang       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
18397929ea7SJunchao Zhang       PetscInt          n,N = 22;
18497929ea7SJunchao Zhang       Vec               x,xg,yg;
18597929ea7SJunchao Zhang       IS                ix,iy;
18697929ea7SJunchao Zhang       VecScatter        vscat;
18797929ea7SJunchao Zhang       const PetscScalar *xvalue;
18897929ea7SJunchao Zhang       MPI_Comm          intercomm,parentcomm;
18997929ea7SJunchao Zhang       PetscMPIInt       lrank;
19097929ea7SJunchao Zhang 
191*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_rank(subcomm,&lrank));
1928a53a0a4SSajid Ali       /* x is on subcomm */
193*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreate(subcomm, &x));
194*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
1958a53a0a4SSajid Ali       if (iscuda) {
196*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(x, VECCUDA));
1978a53a0a4SSajid Ali       } else {
198*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(x, VECSTANDARD));
1998a53a0a4SSajid Ali       }
200*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(x));
201*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(x,&low,&high));
20297929ea7SJunchao Zhang 
20397929ea7SJunchao Zhang       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
20497929ea7SJunchao Zhang       for (i=low; i<high; i++) {
20597929ea7SJunchao Zhang         PetscScalar val = i;
206*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES));
20797929ea7SJunchao Zhang       }
208*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecAssemblyBegin(x));
209*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecAssemblyEnd(x));
21097929ea7SJunchao Zhang 
211*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm));
21297929ea7SJunchao Zhang 
21397929ea7SJunchao Zhang       /* Tell rank 0 of subcomm1 the global size of x */
214*5f80ce2aSJacob Faibussowitsch       if (!lrank) CHKERRMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm));
21597929ea7SJunchao Zhang 
21697929ea7SJunchao Zhang       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
21797929ea7SJunchao Zhang         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
21897929ea7SJunchao Zhang       */
219*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm));
22097929ea7SJunchao Zhang 
22197929ea7SJunchao Zhang       /* Create a vector xg on parentcomm, which shares memory with x */
222*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(x,&n));
2238a53a0a4SSajid Ali       if (iscuda) {
2248a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
225*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArrayRead(x,&xvalue));
226*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg));
2278a53a0a4SSajid Ali         #endif
2288a53a0a4SSajid Ali       } else {
229*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArrayRead(x,&xvalue));
230*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg));
2318a53a0a4SSajid Ali       }
23297929ea7SJunchao Zhang 
23397929ea7SJunchao Zhang       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
2348a53a0a4SSajid Ali       if (iscuda) {
2358a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
236*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2378a53a0a4SSajid Ali         #endif
2388a53a0a4SSajid Ali       } else {
239*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2408a53a0a4SSajid Ali       }
24197929ea7SJunchao Zhang 
24297929ea7SJunchao Zhang       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
243*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */
244*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
245*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
246*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat));
24797929ea7SJunchao Zhang 
24897929ea7SJunchao Zhang       /* Scatter values from xg to yg */
249*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
250*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
25197929ea7SJunchao Zhang 
25297929ea7SJunchao Zhang       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
2538a53a0a4SSajid Ali       if (iscuda) {
2548a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
255*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArrayRead(x,&xvalue));
2568a53a0a4SSajid Ali         #endif
2578a53a0a4SSajid Ali       } else {
258*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArrayRead(x,&xvalue));
2598a53a0a4SSajid Ali       }
260*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&x));
261*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&ix));
262*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&iy));
263*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&xg));
264*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&yg));
265*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterDestroy(&vscat));
266*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&intercomm));
267*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&parentcomm));
26897929ea7SJunchao Zhang     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
26997929ea7SJunchao Zhang       PetscInt    n,N;
27097929ea7SJunchao Zhang       Vec         y,xg,yg;
27197929ea7SJunchao Zhang       IS          ix,iy;
27297929ea7SJunchao Zhang       VecScatter  vscat;
27397929ea7SJunchao Zhang       PetscScalar *yvalue;
27497929ea7SJunchao Zhang       MPI_Comm    intercomm,parentcomm;
27597929ea7SJunchao Zhang       PetscMPIInt lrank;
27697929ea7SJunchao Zhang 
277*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_rank(subcomm,&lrank));
278*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm));
27997929ea7SJunchao Zhang 
28097929ea7SJunchao Zhang       /* Two rank-0 are talking */
281*5f80ce2aSJacob Faibussowitsch       if (!lrank) CHKERRMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE));
28297929ea7SJunchao Zhang       /* Rank 0 of subcomm1 bcasts N to its members */
283*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm));
28497929ea7SJunchao Zhang 
28597929ea7SJunchao Zhang       /* Create a intracomm Petsc can work on */
286*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm));
28797929ea7SJunchao Zhang 
28897929ea7SJunchao Zhang       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
2898a53a0a4SSajid Ali       if (iscuda) {
2908a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
291*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2928a53a0a4SSajid Ali         #endif
2938a53a0a4SSajid Ali       } else {
294*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2958a53a0a4SSajid Ali       }
29697929ea7SJunchao Zhang 
297*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreate(subcomm, &y));
298*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
2998a53a0a4SSajid Ali       if (iscuda) {
300*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECCUDA));
3018a53a0a4SSajid Ali       } else {
302*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECSTANDARD));
3038a53a0a4SSajid Ali       }
304*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(y));
3058a53a0a4SSajid Ali 
306*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */
307*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(y,&n));
3088a53a0a4SSajid Ali       if (iscuda) {
3098a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
310*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArray(y,&yvalue));
3118a53a0a4SSajid Ali         #endif
3128a53a0a4SSajid Ali       } else {
313*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(y,&yvalue));
3148a53a0a4SSajid Ali       }
31597929ea7SJunchao Zhang       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
31697929ea7SJunchao Zhang         created in the same order in subcomm0/1. For example, we can not reverse the order of
31797929ea7SJunchao Zhang         creating xg and yg in subcomm1.
31897929ea7SJunchao Zhang       */
3198a53a0a4SSajid Ali       if (iscuda) {
3208a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
321*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3228a53a0a4SSajid Ali         #endif
3238a53a0a4SSajid Ali       } else {
324*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3258a53a0a4SSajid Ali       }
32697929ea7SJunchao Zhang 
32797929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map.
32897929ea7SJunchao Zhang         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
32997929ea7SJunchao Zhang       */
330*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
331*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
332*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat));
33397929ea7SJunchao Zhang 
33497929ea7SJunchao Zhang       /* Scatter values from xg to yg */
335*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
336*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
33797929ea7SJunchao Zhang 
33897929ea7SJunchao Zhang       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
3398a53a0a4SSajid Ali       if (iscuda) {
3408a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
341*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArray(y,&yvalue));
3428a53a0a4SSajid Ali         #endif
3438a53a0a4SSajid Ali       } else {
344*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
3458a53a0a4SSajid Ali       }
34697929ea7SJunchao Zhang 
34797929ea7SJunchao Zhang       /* Libraries on subcomm1 can safely use y now, for example, view it */
348*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
34997929ea7SJunchao Zhang 
350*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&y));
351*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&ix));
352*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&iy));
353*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&xg));
354*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&yg));
355*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterDestroy(&vscat));
356*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&intercomm));
357*5f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&parentcomm));
35897929ea7SJunchao Zhang     } else if (mycolor == 2) { /* subcomm2 */
35997929ea7SJunchao Zhang       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
36097929ea7SJunchao Zhang     }
36197929ea7SJunchao Zhang   } /* sub2sub */
36297929ea7SJunchao Zhang 
36397929ea7SJunchao Zhang   /*===========================================================================
36497929ea7SJunchao Zhang    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
36597929ea7SJunchao Zhang    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
36697929ea7SJunchao Zhang    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
36797929ea7SJunchao Zhang    *  to achieve that.
36897929ea7SJunchao Zhang    *===========================================================================*/
36997929ea7SJunchao Zhang   if (world2subs) {
3708a53a0a4SSajid Ali     Vec         y;
37197929ea7SJunchao Zhang     PetscInt    n,N=15,xstart,ystart,low,high;
37297929ea7SJunchao Zhang     PetscScalar *yvalue;
37397929ea7SJunchao Zhang 
37497929ea7SJunchao Zhang     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
375*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x));
376*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
3778a53a0a4SSajid Ali     if (iscuda) {
378*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECCUDA));
3798a53a0a4SSajid Ali     } else {
380*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECSTANDARD));
3818a53a0a4SSajid Ali     }
382*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(x));
383*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(x,&low,&high));
384*5f80ce2aSJacob Faibussowitsch     for (i=low; i<high; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES));
385*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyBegin(x));
386*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyEnd(x));
38797929ea7SJunchao Zhang 
38897929ea7SJunchao Zhang     /* Every subcomm has a y as long as x */
389*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(subcomm, &y));
390*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
3918a53a0a4SSajid Ali     if (iscuda) {
392*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(y, VECCUDA));
3938a53a0a4SSajid Ali     } else {
394*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(y, VECSTANDARD));
3958a53a0a4SSajid Ali     }
396*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(y));
397*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetLocalSize(y,&n));
39897929ea7SJunchao Zhang 
39997929ea7SJunchao Zhang     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
40097929ea7SJunchao Zhang        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
40197929ea7SJunchao Zhang        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
40297929ea7SJunchao Zhang        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
40397929ea7SJunchao Zhang     */
4048a53a0a4SSajid Ali     if (iscuda) {
4058a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
406*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCUDAGetArray(y,&yvalue));
407*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4088a53a0a4SSajid Ali       #endif
4098a53a0a4SSajid Ali     } else {
410*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArray(y,&yvalue));
411*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4128a53a0a4SSajid Ali     }
413*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */
41497929ea7SJunchao Zhang 
41597929ea7SJunchao Zhang     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
41697929ea7SJunchao Zhang        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
41797929ea7SJunchao Zhang      */
418*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(y,&xstart,NULL));
419*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(yg,&ystart,NULL));
42097929ea7SJunchao Zhang 
421*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix));
422*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy));
423*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
424*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
425*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
42697929ea7SJunchao Zhang 
42797929ea7SJunchao Zhang     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
428*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecView(yg,PETSC_VIEWER_STDOUT_WORLD));
429*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&yg));
43097929ea7SJunchao Zhang 
43197929ea7SJunchao Zhang     /* Restory yvalue so that processes in subcomm can use y from now on. */
4328a53a0a4SSajid Ali     if (iscuda) {
4338a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
434*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCUDARestoreArray(y,&yvalue));
4358a53a0a4SSajid Ali       #endif
4368a53a0a4SSajid Ali     } else {
437*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecRestoreArray(y,&yvalue));
4388a53a0a4SSajid Ali     }
439*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScale(y,3.0));
44097929ea7SJunchao Zhang 
441*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */
442*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&iy));
443*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&x));
444*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&y));
445*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterDestroy(&vscat));
44697929ea7SJunchao Zhang   } /* world2subs */
44797929ea7SJunchao Zhang 
448*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_free(&subcomm));
44997929ea7SJunchao Zhang   ierr = PetscFinalize();
45097929ea7SJunchao Zhang   return ierr;
45197929ea7SJunchao Zhang }
45297929ea7SJunchao Zhang 
45397929ea7SJunchao Zhang /*TEST
45497929ea7SJunchao Zhang 
45597929ea7SJunchao Zhang    build:
456dfd57a17SPierre Jolivet      requires: !defined(PETSC_HAVE_MPIUNI)
457c6c723bbSStefano Zampini 
45897929ea7SJunchao Zhang    testset:
45997929ea7SJunchao Zhang      nsize: 7
46097929ea7SJunchao Zhang 
46197929ea7SJunchao Zhang      test:
46297929ea7SJunchao Zhang        suffix: 1
46397929ea7SJunchao Zhang        args: -world2sub
46497929ea7SJunchao Zhang 
46597929ea7SJunchao Zhang      test:
46697929ea7SJunchao Zhang        suffix: 2
46797929ea7SJunchao Zhang        args: -sub2sub
468f424265bSStefano Zampini        # deadlocks with NECMPI and INTELMPI (20210400300)
469f424265bSStefano Zampini        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
47097929ea7SJunchao Zhang 
47197929ea7SJunchao Zhang      test:
47297929ea7SJunchao Zhang        suffix: 3
47397929ea7SJunchao Zhang        args: -world2subs
47497929ea7SJunchao Zhang 
47597929ea7SJunchao Zhang      test:
47697929ea7SJunchao Zhang        suffix: 4
4778a53a0a4SSajid Ali        args: -world2sub -vectype cuda
4788a53a0a4SSajid Ali        requires: cuda
4798a53a0a4SSajid Ali 
4808a53a0a4SSajid Ali      test:
4818a53a0a4SSajid Ali        suffix: 5
4828a53a0a4SSajid Ali        args: -sub2sub -vectype cuda
4838a53a0a4SSajid Ali        requires: cuda
4848a53a0a4SSajid Ali 
4858a53a0a4SSajid Ali      test:
4868a53a0a4SSajid Ali       suffix: 6
4878a53a0a4SSajid Ali       args: -world2subs -vectype cuda
4888a53a0a4SSajid Ali       requires: cuda
4898a53a0a4SSajid Ali 
4908a53a0a4SSajid Ali      test:
4918a53a0a4SSajid Ali        suffix: 7
49297929ea7SJunchao Zhang        args: -world2sub -sf_type neighbor
49397929ea7SJunchao Zhang        output_file: output/ex9_1.out
494c6c723bbSStefano Zampini        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
495c6c723bbSStefano Zampini        # segfaults with NECMPI
496c6c723bbSStefano Zampini        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
49797929ea7SJunchao Zhang TEST*/
498