xref: /petsc/src/vec/is/sf/tests/ex9.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
297929ea7SJunchao Zhang   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
38a53a0a4SSajid Ali   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
48a53a0a4SSajid Ali   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
597929ea7SJunchao Zhang 
697929ea7SJunchao Zhang #include <petscvec.h>
797929ea7SJunchao Zhang int main(int argc,char **argv)
897929ea7SJunchao Zhang {
997929ea7SJunchao Zhang   PetscMPIInt    nproc,grank,mycolor;
1097929ea7SJunchao Zhang   PetscInt       i,n,N = 20,low,high;
1197929ea7SJunchao Zhang   MPI_Comm       subcomm;
128a53a0a4SSajid Ali   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
138a53a0a4SSajid Ali   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
1497929ea7SJunchao Zhang   VecScatter     vscat;
1597929ea7SJunchao Zhang   IS             ix,iy;
168a53a0a4SSajid Ali   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
178a53a0a4SSajid Ali   PetscBool      optionflag, compareflag;
188a53a0a4SSajid Ali   char           vectypename[PETSC_MAX_PATH_LEN];
1997929ea7SJunchao Zhang   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
2097929ea7SJunchao Zhang   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
2197929ea7SJunchao Zhang   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */
2297929ea7SJunchao Zhang 
23*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help));
245f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc));
255f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank));
2697929ea7SJunchao Zhang 
272c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nproc < 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run");
2897929ea7SJunchao Zhang 
295f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL));
305f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL));
315f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL));
325f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag));
338a53a0a4SSajid Ali   if (optionflag) {
345f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag));
358a53a0a4SSajid Ali     if (compareflag) iscuda = PETSC_TRUE;
368a53a0a4SSajid Ali   }
3797929ea7SJunchao Zhang 
3897929ea7SJunchao Zhang   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
3997929ea7SJunchao Zhang   mycolor = grank % 3;
405f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm));
4197929ea7SJunchao Zhang 
4297929ea7SJunchao Zhang   /*===========================================================================
4397929ea7SJunchao Zhang    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
4497929ea7SJunchao Zhang    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
4597929ea7SJunchao Zhang    *===========================================================================*/
4697929ea7SJunchao Zhang   if (world2sub) {
475f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x));
485f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
498a53a0a4SSajid Ali     if (iscuda) {
505f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECCUDA));
518a53a0a4SSajid Ali     } else {
525f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECSTANDARD));
538a53a0a4SSajid Ali     }
545f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(x));
555f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */
5697929ea7SJunchao Zhang 
5797929ea7SJunchao Zhang     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
585f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(x,&low,&high));
5997929ea7SJunchao Zhang     for (i=low; i<high; i++) {
6097929ea7SJunchao Zhang       PetscScalar val = -i;
615f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES));
6297929ea7SJunchao Zhang     }
635f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyBegin(x));
645f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyEnd(x));
6597929ea7SJunchao Zhang 
6697929ea7SJunchao Zhang     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
6797929ea7SJunchao Zhang     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
6897929ea7SJunchao Zhang       Vec         y;
6997929ea7SJunchao Zhang       PetscScalar *yvalue;
705f80ce2aSJacob Faibussowitsch        CHKERRQ(VecCreate(subcomm, &y));
715f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
728a53a0a4SSajid Ali       if (iscuda) {
735f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECCUDA));
748a53a0a4SSajid Ali       } else {
755f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECSTANDARD));
768a53a0a4SSajid Ali       }
775f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(y));
785f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */
795f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(y,&n));
808a53a0a4SSajid Ali       if (iscuda) {
818a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
825f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArray(y,&yvalue));
838a53a0a4SSajid Ali         #endif
848a53a0a4SSajid Ali       } else {
855f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(y,&yvalue));
868a53a0a4SSajid Ali       }
8797929ea7SJunchao Zhang       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
8897929ea7SJunchao Zhang         Note this is a collective call. All processes have to call it and supply consistent N.
8997929ea7SJunchao Zhang       */
908a53a0a4SSajid Ali       if (iscuda) {
918a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
925f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
938a53a0a4SSajid Ali         #endif
948a53a0a4SSajid Ali       } else {
955f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
968a53a0a4SSajid Ali       }
9797929ea7SJunchao Zhang 
9897929ea7SJunchao Zhang       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
995f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */
1005f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
1015f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
10297929ea7SJunchao Zhang 
10397929ea7SJunchao Zhang       /* Union of ix's on subcomm0 covers the full range of [0,N) */
1045f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
1055f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
1065f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
10797929ea7SJunchao Zhang 
10897929ea7SJunchao Zhang       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
10997929ea7SJunchao Zhang         VecGetArray must be paired with VecRestoreArray.
11097929ea7SJunchao Zhang       */
1118a53a0a4SSajid Ali       if (iscuda) {
1128a53a0a4SSajid Ali          #if defined(PETSC_HAVE_CUDA)
1135f80ce2aSJacob Faibussowitsch            CHKERRQ(VecCUDARestoreArray(y,&yvalue));
1148a53a0a4SSajid Ali          #endif
1158a53a0a4SSajid Ali       } else {
1165f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
1178a53a0a4SSajid Ali       }
11897929ea7SJunchao Zhang 
11997929ea7SJunchao Zhang       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
1205f80ce2aSJacob Faibussowitsch       CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
1215f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScale(y,2.0));
12297929ea7SJunchao Zhang 
12397929ea7SJunchao Zhang       /* Send the new y back to x */
1245f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
12597929ea7SJunchao Zhang       /* Supply new yvalue to yg without memory copying */
1265f80ce2aSJacob Faibussowitsch       CHKERRQ(VecPlaceArray(yg,yvalue));
1275f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1285f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1295f80ce2aSJacob Faibussowitsch       CHKERRQ(VecResetArray(yg));
1308a53a0a4SSajid Ali       if (iscuda) {
1318a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
1325f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArray(y,&yvalue));
1338a53a0a4SSajid Ali         #endif
1348a53a0a4SSajid Ali       } else {
1355f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
1368a53a0a4SSajid Ali       }
13797929ea7SJunchao Zhang 
1385f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&y));
13997929ea7SJunchao Zhang     } else {
14097929ea7SJunchao Zhang       /* Ranks outside of subcomm0 do not supply values to yg */
1418a53a0a4SSajid Ali       if (iscuda) {
1428a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
1435f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1448a53a0a4SSajid Ali         #endif
1458a53a0a4SSajid Ali       } else {
1465f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1478a53a0a4SSajid Ali       }
14897929ea7SJunchao Zhang 
14997929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
15097929ea7SJunchao Zhang         ranks just need to create empty ISes to cheat VecScatterCreate.
15197929ea7SJunchao Zhang       */
1525f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
1535f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
15497929ea7SJunchao Zhang 
1555f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
1565f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
1575f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
15897929ea7SJunchao Zhang 
15997929ea7SJunchao Zhang       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
16097929ea7SJunchao Zhang         But they have to call VecScatterBegin/End since these routines are collective.
16197929ea7SJunchao Zhang       */
1625f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1635f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
16497929ea7SJunchao Zhang     }
16597929ea7SJunchao Zhang 
1665f80ce2aSJacob Faibussowitsch     CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
1675f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&ix));
1685f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&iy));
1695f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&x));
1705f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&yg));
1715f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterDestroy(&vscat));
17297929ea7SJunchao Zhang   } /* world2sub */
17397929ea7SJunchao Zhang 
17497929ea7SJunchao Zhang   /*===========================================================================
17597929ea7SJunchao Zhang    *  Transfer a vector x defined on subcomm0 to a vector y defined on
17697929ea7SJunchao Zhang    *  subcomm1. The two subcomms are not overlapping and their union is
17797929ea7SJunchao Zhang    *  not necessarily equal to PETSC_COMM_WORLD.
17897929ea7SJunchao Zhang    *===========================================================================*/
17997929ea7SJunchao Zhang   if (sub2sub) {
18097929ea7SJunchao Zhang     if (mycolor == 0) {
18197929ea7SJunchao Zhang       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
18297929ea7SJunchao Zhang       PetscInt          n,N = 22;
18397929ea7SJunchao Zhang       Vec               x,xg,yg;
18497929ea7SJunchao Zhang       IS                ix,iy;
18597929ea7SJunchao Zhang       VecScatter        vscat;
18697929ea7SJunchao Zhang       const PetscScalar *xvalue;
18797929ea7SJunchao Zhang       MPI_Comm          intercomm,parentcomm;
18897929ea7SJunchao Zhang       PetscMPIInt       lrank;
18997929ea7SJunchao Zhang 
1905f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_rank(subcomm,&lrank));
1918a53a0a4SSajid Ali       /* x is on subcomm */
1925f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreate(subcomm, &x));
1935f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
1948a53a0a4SSajid Ali       if (iscuda) {
1955f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(x, VECCUDA));
1968a53a0a4SSajid Ali       } else {
1975f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(x, VECSTANDARD));
1988a53a0a4SSajid Ali       }
1995f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(x));
2005f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(x,&low,&high));
20197929ea7SJunchao Zhang 
20297929ea7SJunchao Zhang       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
20397929ea7SJunchao Zhang       for (i=low; i<high; i++) {
20497929ea7SJunchao Zhang         PetscScalar val = i;
2055f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetValue(x,i,val,INSERT_VALUES));
20697929ea7SJunchao Zhang       }
2075f80ce2aSJacob Faibussowitsch       CHKERRQ(VecAssemblyBegin(x));
2085f80ce2aSJacob Faibussowitsch       CHKERRQ(VecAssemblyEnd(x));
20997929ea7SJunchao Zhang 
2105f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm));
21197929ea7SJunchao Zhang 
21297929ea7SJunchao Zhang       /* Tell rank 0 of subcomm1 the global size of x */
2135f80ce2aSJacob Faibussowitsch       if (!lrank) CHKERRMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm));
21497929ea7SJunchao Zhang 
21597929ea7SJunchao Zhang       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
21697929ea7SJunchao Zhang         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
21797929ea7SJunchao Zhang       */
2185f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm));
21997929ea7SJunchao Zhang 
22097929ea7SJunchao Zhang       /* Create a vector xg on parentcomm, which shares memory with x */
2215f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(x,&n));
2228a53a0a4SSajid Ali       if (iscuda) {
2238a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2245f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArrayRead(x,&xvalue));
2255f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg));
2268a53a0a4SSajid Ali         #endif
2278a53a0a4SSajid Ali       } else {
2285f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArrayRead(x,&xvalue));
2295f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg));
2308a53a0a4SSajid Ali       }
23197929ea7SJunchao Zhang 
23297929ea7SJunchao Zhang       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
2338a53a0a4SSajid Ali       if (iscuda) {
2348a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2355f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2368a53a0a4SSajid Ali         #endif
2378a53a0a4SSajid Ali       } else {
2385f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2398a53a0a4SSajid Ali       }
24097929ea7SJunchao Zhang 
24197929ea7SJunchao Zhang       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
2425f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */
2435f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
2445f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
2455f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat));
24697929ea7SJunchao Zhang 
24797929ea7SJunchao Zhang       /* Scatter values from xg to yg */
2485f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
2495f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
25097929ea7SJunchao Zhang 
25197929ea7SJunchao Zhang       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
2528a53a0a4SSajid Ali       if (iscuda) {
2538a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2545f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArrayRead(x,&xvalue));
2558a53a0a4SSajid Ali         #endif
2568a53a0a4SSajid Ali       } else {
2575f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArrayRead(x,&xvalue));
2588a53a0a4SSajid Ali       }
2595f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&x));
2605f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&ix));
2615f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&iy));
2625f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&xg));
2635f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&yg));
2645f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterDestroy(&vscat));
2655f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&intercomm));
2665f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&parentcomm));
26797929ea7SJunchao Zhang     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
26897929ea7SJunchao Zhang       PetscInt    n,N;
26997929ea7SJunchao Zhang       Vec         y,xg,yg;
27097929ea7SJunchao Zhang       IS          ix,iy;
27197929ea7SJunchao Zhang       VecScatter  vscat;
27297929ea7SJunchao Zhang       PetscScalar *yvalue;
27397929ea7SJunchao Zhang       MPI_Comm    intercomm,parentcomm;
27497929ea7SJunchao Zhang       PetscMPIInt lrank;
27597929ea7SJunchao Zhang 
2765f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_rank(subcomm,&lrank));
2775f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm));
27897929ea7SJunchao Zhang 
27997929ea7SJunchao Zhang       /* Two rank-0 are talking */
2805f80ce2aSJacob Faibussowitsch       if (!lrank) CHKERRMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE));
28197929ea7SJunchao Zhang       /* Rank 0 of subcomm1 bcasts N to its members */
2825f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm));
28397929ea7SJunchao Zhang 
28497929ea7SJunchao Zhang       /* Create a intracomm Petsc can work on */
2855f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm));
28697929ea7SJunchao Zhang 
28797929ea7SJunchao Zhang       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
2888a53a0a4SSajid Ali       if (iscuda) {
2898a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2905f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2918a53a0a4SSajid Ali         #endif
2928a53a0a4SSajid Ali       } else {
2935f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2948a53a0a4SSajid Ali       }
29597929ea7SJunchao Zhang 
2965f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreate(subcomm, &y));
2975f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
2988a53a0a4SSajid Ali       if (iscuda) {
2995f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECCUDA));
3008a53a0a4SSajid Ali       } else {
3015f80ce2aSJacob Faibussowitsch         CHKERRQ(VecSetType(y, VECSTANDARD));
3028a53a0a4SSajid Ali       }
3035f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetUp(y));
3048a53a0a4SSajid Ali 
3055f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */
3065f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetLocalSize(y,&n));
3078a53a0a4SSajid Ali       if (iscuda) {
3088a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3095f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDAGetArray(y,&yvalue));
3108a53a0a4SSajid Ali         #endif
3118a53a0a4SSajid Ali       } else {
3125f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(y,&yvalue));
3138a53a0a4SSajid Ali       }
31497929ea7SJunchao Zhang       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
31597929ea7SJunchao Zhang         created in the same order in subcomm0/1. For example, we can not reverse the order of
31697929ea7SJunchao Zhang         creating xg and yg in subcomm1.
31797929ea7SJunchao Zhang       */
3188a53a0a4SSajid Ali       if (iscuda) {
3198a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3205f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3218a53a0a4SSajid Ali         #endif
3228a53a0a4SSajid Ali       } else {
3235f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3248a53a0a4SSajid Ali       }
32597929ea7SJunchao Zhang 
32697929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map.
32797929ea7SJunchao Zhang         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
32897929ea7SJunchao Zhang       */
3295f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
3305f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(ix,&iy));
3315f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterCreate(xg,ix,yg,iy,&vscat));
33297929ea7SJunchao Zhang 
33397929ea7SJunchao Zhang       /* Scatter values from xg to yg */
3345f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
3355f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
33697929ea7SJunchao Zhang 
33797929ea7SJunchao Zhang       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
3388a53a0a4SSajid Ali       if (iscuda) {
3398a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3405f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCUDARestoreArray(y,&yvalue));
3418a53a0a4SSajid Ali         #endif
3428a53a0a4SSajid Ali       } else {
3435f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(y,&yvalue));
3448a53a0a4SSajid Ali       }
34597929ea7SJunchao Zhang 
34697929ea7SJunchao Zhang       /* Libraries on subcomm1 can safely use y now, for example, view it */
3475f80ce2aSJacob Faibussowitsch       CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
34897929ea7SJunchao Zhang 
3495f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&y));
3505f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&ix));
3515f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&iy));
3525f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&xg));
3535f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&yg));
3545f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterDestroy(&vscat));
3555f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&intercomm));
3565f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Comm_free(&parentcomm));
35797929ea7SJunchao Zhang     } else if (mycolor == 2) { /* subcomm2 */
35897929ea7SJunchao Zhang       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
35997929ea7SJunchao Zhang     }
36097929ea7SJunchao Zhang   } /* sub2sub */
36197929ea7SJunchao Zhang 
36297929ea7SJunchao Zhang   /*===========================================================================
36397929ea7SJunchao Zhang    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
36497929ea7SJunchao Zhang    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
36597929ea7SJunchao Zhang    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
36697929ea7SJunchao Zhang    *  to achieve that.
36797929ea7SJunchao Zhang    *===========================================================================*/
36897929ea7SJunchao Zhang   if (world2subs) {
3698a53a0a4SSajid Ali     Vec         y;
37097929ea7SJunchao Zhang     PetscInt    n,N=15,xstart,ystart,low,high;
37197929ea7SJunchao Zhang     PetscScalar *yvalue;
37297929ea7SJunchao Zhang 
37397929ea7SJunchao Zhang     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
3745f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(PETSC_COMM_WORLD, &x));
3755f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(x, PETSC_DECIDE, N));
3768a53a0a4SSajid Ali     if (iscuda) {
3775f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECCUDA));
3788a53a0a4SSajid Ali     } else {
3795f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(x, VECSTANDARD));
3808a53a0a4SSajid Ali     }
3815f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(x));
3825f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(x,&low,&high));
3835f80ce2aSJacob Faibussowitsch     for (i=low; i<high; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES));
3845f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyBegin(x));
3855f80ce2aSJacob Faibussowitsch     CHKERRQ(VecAssemblyEnd(x));
38697929ea7SJunchao Zhang 
38797929ea7SJunchao Zhang     /* Every subcomm has a y as long as x */
3885f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCreate(subcomm, &y));
3895f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetSizes(y, PETSC_DECIDE, N));
3908a53a0a4SSajid Ali     if (iscuda) {
3915f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(y, VECCUDA));
3928a53a0a4SSajid Ali     } else {
3935f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSetType(y, VECSTANDARD));
3948a53a0a4SSajid Ali     }
3955f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSetUp(y));
3965f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetLocalSize(y,&n));
39797929ea7SJunchao Zhang 
39897929ea7SJunchao Zhang     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
39997929ea7SJunchao Zhang        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
40097929ea7SJunchao Zhang        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
40197929ea7SJunchao Zhang        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
40297929ea7SJunchao Zhang     */
4038a53a0a4SSajid Ali     if (iscuda) {
4048a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
4055f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCUDAGetArray(y,&yvalue));
4065f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4078a53a0a4SSajid Ali       #endif
4088a53a0a4SSajid Ali     } else {
4095f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArray(y,&yvalue));
4105f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4118a53a0a4SSajid Ali     }
4125f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */
41397929ea7SJunchao Zhang 
41497929ea7SJunchao Zhang     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
41597929ea7SJunchao Zhang        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
41697929ea7SJunchao Zhang      */
4175f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(y,&xstart,NULL));
4185f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(yg,&ystart,NULL));
41997929ea7SJunchao Zhang 
4205f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix));
4215f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy));
4225f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterCreate(x,ix,yg,iy,&vscat));
4235f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
4245f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
42597929ea7SJunchao Zhang 
42697929ea7SJunchao Zhang     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
4275f80ce2aSJacob Faibussowitsch     CHKERRQ(VecView(yg,PETSC_VIEWER_STDOUT_WORLD));
4285f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&yg));
42997929ea7SJunchao Zhang 
43097929ea7SJunchao Zhang     /* Restory yvalue so that processes in subcomm can use y from now on. */
4318a53a0a4SSajid Ali     if (iscuda) {
4328a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
4335f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCUDARestoreArray(y,&yvalue));
4348a53a0a4SSajid Ali       #endif
4358a53a0a4SSajid Ali     } else {
4365f80ce2aSJacob Faibussowitsch       CHKERRQ(VecRestoreArray(y,&yvalue));
4378a53a0a4SSajid Ali     }
4385f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScale(y,3.0));
43997929ea7SJunchao Zhang 
4405f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */
4415f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&iy));
4425f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&x));
4435f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&y));
4445f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterDestroy(&vscat));
44597929ea7SJunchao Zhang   } /* world2subs */
44697929ea7SJunchao Zhang 
4475f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_free(&subcomm));
448*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscFinalize());
449*b122ec5aSJacob Faibussowitsch   return 0;
45097929ea7SJunchao Zhang }
45197929ea7SJunchao Zhang 
45297929ea7SJunchao Zhang /*TEST
45397929ea7SJunchao Zhang 
45497929ea7SJunchao Zhang    build:
455dfd57a17SPierre Jolivet      requires: !defined(PETSC_HAVE_MPIUNI)
456c6c723bbSStefano Zampini 
45797929ea7SJunchao Zhang    testset:
45897929ea7SJunchao Zhang      nsize: 7
45997929ea7SJunchao Zhang 
46097929ea7SJunchao Zhang      test:
46197929ea7SJunchao Zhang        suffix: 1
46297929ea7SJunchao Zhang        args: -world2sub
46397929ea7SJunchao Zhang 
46497929ea7SJunchao Zhang      test:
46597929ea7SJunchao Zhang        suffix: 2
46697929ea7SJunchao Zhang        args: -sub2sub
467f424265bSStefano Zampini        # deadlocks with NECMPI and INTELMPI (20210400300)
468f424265bSStefano Zampini        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
46997929ea7SJunchao Zhang 
47097929ea7SJunchao Zhang      test:
47197929ea7SJunchao Zhang        suffix: 3
47297929ea7SJunchao Zhang        args: -world2subs
47397929ea7SJunchao Zhang 
47497929ea7SJunchao Zhang      test:
47597929ea7SJunchao Zhang        suffix: 4
4768a53a0a4SSajid Ali        args: -world2sub -vectype cuda
4778a53a0a4SSajid Ali        requires: cuda
4788a53a0a4SSajid Ali 
4798a53a0a4SSajid Ali      test:
4808a53a0a4SSajid Ali        suffix: 5
4818a53a0a4SSajid Ali        args: -sub2sub -vectype cuda
4828a53a0a4SSajid Ali        requires: cuda
4838a53a0a4SSajid Ali 
4848a53a0a4SSajid Ali      test:
4858a53a0a4SSajid Ali       suffix: 6
4868a53a0a4SSajid Ali       args: -world2subs -vectype cuda
4878a53a0a4SSajid Ali       requires: cuda
4888a53a0a4SSajid Ali 
4898a53a0a4SSajid Ali      test:
4908a53a0a4SSajid Ali        suffix: 7
49197929ea7SJunchao Zhang        args: -world2sub -sf_type neighbor
49297929ea7SJunchao Zhang        output_file: output/ex9_1.out
493c6c723bbSStefano Zampini        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
494c6c723bbSStefano Zampini        # segfaults with NECMPI
495c6c723bbSStefano Zampini        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
49697929ea7SJunchao Zhang TEST*/
497