xref: /petsc/src/vec/is/sf/tests/ex9.c (revision 08401ef684002a709c6d3db98a0c9f54a8bcf1ec)
197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
297929ea7SJunchao Zhang   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
38a53a0a4SSajid Ali   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
48a53a0a4SSajid Ali   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
597929ea7SJunchao Zhang 
697929ea7SJunchao Zhang #include <petscvec.h>
797929ea7SJunchao Zhang int main(int argc,char **argv)
897929ea7SJunchao Zhang {
997929ea7SJunchao Zhang   PetscMPIInt    nproc,grank,mycolor;
1097929ea7SJunchao Zhang   PetscInt       i,n,N = 20,low,high;
1197929ea7SJunchao Zhang   MPI_Comm       subcomm;
128a53a0a4SSajid Ali   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
138a53a0a4SSajid Ali   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
1497929ea7SJunchao Zhang   VecScatter     vscat;
1597929ea7SJunchao Zhang   IS             ix,iy;
168a53a0a4SSajid Ali   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
178a53a0a4SSajid Ali   PetscBool      optionflag, compareflag;
188a53a0a4SSajid Ali   char           vectypename[PETSC_MAX_PATH_LEN];
1997929ea7SJunchao Zhang   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
2097929ea7SJunchao Zhang   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
2197929ea7SJunchao Zhang   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */
2297929ea7SJunchao Zhang 
239566063dSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc,&argv,(char*)0,help));
249566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc));
259566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&grank));
2697929ea7SJunchao Zhang 
27*08401ef6SPierre Jolivet   PetscCheck(nproc >= 2,PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run");
2897929ea7SJunchao Zhang 
299566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL));
309566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL));
319566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL));
329566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag));
338a53a0a4SSajid Ali   if (optionflag) {
349566063dSJacob Faibussowitsch     PetscCall(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag));
358a53a0a4SSajid Ali     if (compareflag) iscuda = PETSC_TRUE;
368a53a0a4SSajid Ali   }
3797929ea7SJunchao Zhang 
3897929ea7SJunchao Zhang   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
3997929ea7SJunchao Zhang   mycolor = grank % 3;
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm));
4197929ea7SJunchao Zhang 
4297929ea7SJunchao Zhang   /*===========================================================================
4397929ea7SJunchao Zhang    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
4497929ea7SJunchao Zhang    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
4597929ea7SJunchao Zhang    *===========================================================================*/
4697929ea7SJunchao Zhang   if (world2sub) {
479566063dSJacob Faibussowitsch     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
489566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
498a53a0a4SSajid Ali     if (iscuda) {
509566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECCUDA));
518a53a0a4SSajid Ali     } else {
529566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECSTANDARD));
538a53a0a4SSajid Ali     }
549566063dSJacob Faibussowitsch     PetscCall(VecSetUp(x));
559566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)x,"x_commworld")); /* Give a name to view x clearly */
5697929ea7SJunchao Zhang 
5797929ea7SJunchao Zhang     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
589566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(x,&low,&high));
5997929ea7SJunchao Zhang     for (i=low; i<high; i++) {
6097929ea7SJunchao Zhang       PetscScalar val = -i;
619566063dSJacob Faibussowitsch       PetscCall(VecSetValue(x,i,val,INSERT_VALUES));
6297929ea7SJunchao Zhang     }
639566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(x));
649566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(x));
6597929ea7SJunchao Zhang 
6697929ea7SJunchao Zhang     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
6797929ea7SJunchao Zhang     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
6897929ea7SJunchao Zhang       Vec         y;
6997929ea7SJunchao Zhang       PetscScalar *yvalue;
709566063dSJacob Faibussowitsch        PetscCall(VecCreate(subcomm, &y));
719566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
728a53a0a4SSajid Ali       if (iscuda) {
739566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECCUDA));
748a53a0a4SSajid Ali       } else {
759566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECSTANDARD));
768a53a0a4SSajid Ali       }
779566063dSJacob Faibussowitsch       PetscCall(VecSetUp(y));
789566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_0")); /* Give a name to view y clearly */
799566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(y,&n));
808a53a0a4SSajid Ali       if (iscuda) {
818a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
829566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArray(y,&yvalue));
838a53a0a4SSajid Ali         #endif
848a53a0a4SSajid Ali       } else {
859566063dSJacob Faibussowitsch         PetscCall(VecGetArray(y,&yvalue));
868a53a0a4SSajid Ali       }
8797929ea7SJunchao Zhang       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
8897929ea7SJunchao Zhang         Note this is a collective call. All processes have to call it and supply consistent N.
8997929ea7SJunchao Zhang       */
908a53a0a4SSajid Ali       if (iscuda) {
918a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
929566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
938a53a0a4SSajid Ali         #endif
948a53a0a4SSajid Ali       } else {
959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg));
968a53a0a4SSajid Ali       }
9797929ea7SJunchao Zhang 
9897929ea7SJunchao Zhang       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
999566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(yg,&low,&high)); /* low, high are global indices */
1009566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
1019566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
10297929ea7SJunchao Zhang 
10397929ea7SJunchao Zhang       /* Union of ix's on subcomm0 covers the full range of [0,N) */
1049566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
1059566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
1069566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
10797929ea7SJunchao Zhang 
10897929ea7SJunchao Zhang       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
10997929ea7SJunchao Zhang         VecGetArray must be paired with VecRestoreArray.
11097929ea7SJunchao Zhang       */
1118a53a0a4SSajid Ali       if (iscuda) {
1128a53a0a4SSajid Ali          #if defined(PETSC_HAVE_CUDA)
1139566063dSJacob Faibussowitsch            PetscCall(VecCUDARestoreArray(y,&yvalue));
1148a53a0a4SSajid Ali          #endif
1158a53a0a4SSajid Ali       } else {
1169566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
1178a53a0a4SSajid Ali       }
11897929ea7SJunchao Zhang 
11997929ea7SJunchao Zhang       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
1209566063dSJacob Faibussowitsch       PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
1219566063dSJacob Faibussowitsch       PetscCall(VecScale(y,2.0));
12297929ea7SJunchao Zhang 
12397929ea7SJunchao Zhang       /* Send the new y back to x */
1249566063dSJacob Faibussowitsch       PetscCall(VecGetArray(y,&yvalue)); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
12597929ea7SJunchao Zhang       /* Supply new yvalue to yg without memory copying */
1269566063dSJacob Faibussowitsch       PetscCall(VecPlaceArray(yg,yvalue));
1279566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1289566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1299566063dSJacob Faibussowitsch       PetscCall(VecResetArray(yg));
1308a53a0a4SSajid Ali       if (iscuda) {
1318a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
1329566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArray(y,&yvalue));
1338a53a0a4SSajid Ali         #endif
1348a53a0a4SSajid Ali       } else {
1359566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
1368a53a0a4SSajid Ali       }
13797929ea7SJunchao Zhang 
1389566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&y));
13997929ea7SJunchao Zhang     } else {
14097929ea7SJunchao Zhang       /* Ranks outside of subcomm0 do not supply values to yg */
1418a53a0a4SSajid Ali       if (iscuda) {
1428a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
1439566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1448a53a0a4SSajid Ali         #endif
1458a53a0a4SSajid Ali       } else {
1469566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg));
1478a53a0a4SSajid Ali       }
14897929ea7SJunchao Zhang 
14997929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
15097929ea7SJunchao Zhang         ranks just need to create empty ISes to cheat VecScatterCreate.
15197929ea7SJunchao Zhang       */
1529566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
1539566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
15497929ea7SJunchao Zhang 
1559566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
1569566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
1579566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
15897929ea7SJunchao Zhang 
15997929ea7SJunchao Zhang       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
16097929ea7SJunchao Zhang         But they have to call VecScatterBegin/End since these routines are collective.
16197929ea7SJunchao Zhang       */
1629566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
1639566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE));
16497929ea7SJunchao Zhang     }
16597929ea7SJunchao Zhang 
1669566063dSJacob Faibussowitsch     PetscCall(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
1679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&ix));
1689566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iy));
1699566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&x));
1709566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&yg));
1719566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vscat));
17297929ea7SJunchao Zhang   } /* world2sub */
17397929ea7SJunchao Zhang 
17497929ea7SJunchao Zhang   /*===========================================================================
17597929ea7SJunchao Zhang    *  Transfer a vector x defined on subcomm0 to a vector y defined on
17697929ea7SJunchao Zhang    *  subcomm1. The two subcomms are not overlapping and their union is
17797929ea7SJunchao Zhang    *  not necessarily equal to PETSC_COMM_WORLD.
17897929ea7SJunchao Zhang    *===========================================================================*/
17997929ea7SJunchao Zhang   if (sub2sub) {
18097929ea7SJunchao Zhang     if (mycolor == 0) {
18197929ea7SJunchao Zhang       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
18297929ea7SJunchao Zhang       PetscInt          n,N = 22;
18397929ea7SJunchao Zhang       Vec               x,xg,yg;
18497929ea7SJunchao Zhang       IS                ix,iy;
18597929ea7SJunchao Zhang       VecScatter        vscat;
18697929ea7SJunchao Zhang       const PetscScalar *xvalue;
18797929ea7SJunchao Zhang       MPI_Comm          intercomm,parentcomm;
18897929ea7SJunchao Zhang       PetscMPIInt       lrank;
18997929ea7SJunchao Zhang 
1909566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(subcomm,&lrank));
1918a53a0a4SSajid Ali       /* x is on subcomm */
1929566063dSJacob Faibussowitsch       PetscCall(VecCreate(subcomm, &x));
1939566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
1948a53a0a4SSajid Ali       if (iscuda) {
1959566063dSJacob Faibussowitsch         PetscCall(VecSetType(x, VECCUDA));
1968a53a0a4SSajid Ali       } else {
1979566063dSJacob Faibussowitsch         PetscCall(VecSetType(x, VECSTANDARD));
1988a53a0a4SSajid Ali       }
1999566063dSJacob Faibussowitsch       PetscCall(VecSetUp(x));
2009566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(x,&low,&high));
20197929ea7SJunchao Zhang 
20297929ea7SJunchao Zhang       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
20397929ea7SJunchao Zhang       for (i=low; i<high; i++) {
20497929ea7SJunchao Zhang         PetscScalar val = i;
2059566063dSJacob Faibussowitsch         PetscCall(VecSetValue(x,i,val,INSERT_VALUES));
20697929ea7SJunchao Zhang       }
2079566063dSJacob Faibussowitsch       PetscCall(VecAssemblyBegin(x));
2089566063dSJacob Faibussowitsch       PetscCall(VecAssemblyEnd(x));
20997929ea7SJunchao Zhang 
2109566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm));
21197929ea7SJunchao Zhang 
21297929ea7SJunchao Zhang       /* Tell rank 0 of subcomm1 the global size of x */
2139566063dSJacob Faibussowitsch       if (!lrank) PetscCallMPI(MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm));
21497929ea7SJunchao Zhang 
21597929ea7SJunchao Zhang       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
21697929ea7SJunchao Zhang         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
21797929ea7SJunchao Zhang       */
2189566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm));
21997929ea7SJunchao Zhang 
22097929ea7SJunchao Zhang       /* Create a vector xg on parentcomm, which shares memory with x */
2219566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(x,&n));
2228a53a0a4SSajid Ali       if (iscuda) {
2238a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2249566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArrayRead(x,&xvalue));
2259566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg));
2268a53a0a4SSajid Ali         #endif
2278a53a0a4SSajid Ali       } else {
2289566063dSJacob Faibussowitsch         PetscCall(VecGetArrayRead(x,&xvalue));
2299566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg));
2308a53a0a4SSajid Ali       }
23197929ea7SJunchao Zhang 
23297929ea7SJunchao Zhang       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
2338a53a0a4SSajid Ali       if (iscuda) {
2348a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2359566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2368a53a0a4SSajid Ali         #endif
2378a53a0a4SSajid Ali       } else {
2389566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg));
2398a53a0a4SSajid Ali       }
24097929ea7SJunchao Zhang 
24197929ea7SJunchao Zhang       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
2429566063dSJacob Faibussowitsch       PetscCall(VecGetOwnershipRange(xg,&low,&high)); /* low, high are global indices of xg */
2439566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix));
2449566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
2459566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat));
24697929ea7SJunchao Zhang 
24797929ea7SJunchao Zhang       /* Scatter values from xg to yg */
2489566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
2499566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
25097929ea7SJunchao Zhang 
25197929ea7SJunchao Zhang       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
2528a53a0a4SSajid Ali       if (iscuda) {
2538a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2549566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArrayRead(x,&xvalue));
2558a53a0a4SSajid Ali         #endif
2568a53a0a4SSajid Ali       } else {
2579566063dSJacob Faibussowitsch         PetscCall(VecRestoreArrayRead(x,&xvalue));
2588a53a0a4SSajid Ali       }
2599566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&x));
2609566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&ix));
2619566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iy));
2629566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&xg));
2639566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&yg));
2649566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vscat));
2659566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&intercomm));
2669566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&parentcomm));
26797929ea7SJunchao Zhang     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
26897929ea7SJunchao Zhang       PetscInt    n,N;
26997929ea7SJunchao Zhang       Vec         y,xg,yg;
27097929ea7SJunchao Zhang       IS          ix,iy;
27197929ea7SJunchao Zhang       VecScatter  vscat;
27297929ea7SJunchao Zhang       PetscScalar *yvalue;
27397929ea7SJunchao Zhang       MPI_Comm    intercomm,parentcomm;
27497929ea7SJunchao Zhang       PetscMPIInt lrank;
27597929ea7SJunchao Zhang 
2769566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(subcomm,&lrank));
2779566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm));
27897929ea7SJunchao Zhang 
27997929ea7SJunchao Zhang       /* Two rank-0 are talking */
2809566063dSJacob Faibussowitsch       if (!lrank) PetscCallMPI(MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE));
28197929ea7SJunchao Zhang       /* Rank 0 of subcomm1 bcasts N to its members */
2829566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm));
28397929ea7SJunchao Zhang 
28497929ea7SJunchao Zhang       /* Create a intracomm Petsc can work on */
2859566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm));
28697929ea7SJunchao Zhang 
28797929ea7SJunchao Zhang       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
2888a53a0a4SSajid Ali       if (iscuda) {
2898a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
2909566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2918a53a0a4SSajid Ali         #endif
2928a53a0a4SSajid Ali       } else {
2939566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg));
2948a53a0a4SSajid Ali       }
29597929ea7SJunchao Zhang 
2969566063dSJacob Faibussowitsch       PetscCall(VecCreate(subcomm, &y));
2979566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
2988a53a0a4SSajid Ali       if (iscuda) {
2999566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECCUDA));
3008a53a0a4SSajid Ali       } else {
3019566063dSJacob Faibussowitsch         PetscCall(VecSetType(y, VECSTANDARD));
3028a53a0a4SSajid Ali       }
3039566063dSJacob Faibussowitsch       PetscCall(VecSetUp(y));
3048a53a0a4SSajid Ali 
3059566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)y,"y_subcomm_1")); /* Give a name to view y clearly */
3069566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(y,&n));
3078a53a0a4SSajid Ali       if (iscuda) {
3088a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3099566063dSJacob Faibussowitsch           PetscCall(VecCUDAGetArray(y,&yvalue));
3108a53a0a4SSajid Ali         #endif
3118a53a0a4SSajid Ali       } else {
3129566063dSJacob Faibussowitsch         PetscCall(VecGetArray(y,&yvalue));
3138a53a0a4SSajid Ali       }
31497929ea7SJunchao Zhang       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
31597929ea7SJunchao Zhang         created in the same order in subcomm0/1. For example, we can not reverse the order of
31697929ea7SJunchao Zhang         creating xg and yg in subcomm1.
31797929ea7SJunchao Zhang       */
3188a53a0a4SSajid Ali       if (iscuda) {
3198a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3209566063dSJacob Faibussowitsch           PetscCall(VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3218a53a0a4SSajid Ali         #endif
3228a53a0a4SSajid Ali       } else {
3239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg));
3248a53a0a4SSajid Ali       }
32597929ea7SJunchao Zhang 
32697929ea7SJunchao Zhang       /* Ranks in subcomm0 already specified the full range of the identity map.
32797929ea7SJunchao Zhang         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
32897929ea7SJunchao Zhang       */
3299566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix));
3309566063dSJacob Faibussowitsch       PetscCall(ISDuplicate(ix,&iy));
3319566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(xg,ix,yg,iy,&vscat));
33297929ea7SJunchao Zhang 
33397929ea7SJunchao Zhang       /* Scatter values from xg to yg */
3349566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
3359566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD));
33697929ea7SJunchao Zhang 
33797929ea7SJunchao Zhang       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
3388a53a0a4SSajid Ali       if (iscuda) {
3398a53a0a4SSajid Ali         #if defined(PETSC_HAVE_CUDA)
3409566063dSJacob Faibussowitsch           PetscCall(VecCUDARestoreArray(y,&yvalue));
3418a53a0a4SSajid Ali         #endif
3428a53a0a4SSajid Ali       } else {
3439566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(y,&yvalue));
3448a53a0a4SSajid Ali       }
34597929ea7SJunchao Zhang 
34697929ea7SJunchao Zhang       /* Libraries on subcomm1 can safely use y now, for example, view it */
3479566063dSJacob Faibussowitsch       PetscCall(VecView(y,PETSC_VIEWER_STDOUT_(subcomm)));
34897929ea7SJunchao Zhang 
3499566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&y));
3509566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&ix));
3519566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iy));
3529566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&xg));
3539566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&yg));
3549566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vscat));
3559566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&intercomm));
3569566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&parentcomm));
35797929ea7SJunchao Zhang     } else if (mycolor == 2) { /* subcomm2 */
35897929ea7SJunchao Zhang       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
35997929ea7SJunchao Zhang     }
36097929ea7SJunchao Zhang   } /* sub2sub */
36197929ea7SJunchao Zhang 
36297929ea7SJunchao Zhang   /*===========================================================================
36397929ea7SJunchao Zhang    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
36497929ea7SJunchao Zhang    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
36597929ea7SJunchao Zhang    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
36697929ea7SJunchao Zhang    *  to achieve that.
36797929ea7SJunchao Zhang    *===========================================================================*/
36897929ea7SJunchao Zhang   if (world2subs) {
3698a53a0a4SSajid Ali     Vec         y;
37097929ea7SJunchao Zhang     PetscInt    n,N=15,xstart,ystart,low,high;
37197929ea7SJunchao Zhang     PetscScalar *yvalue;
37297929ea7SJunchao Zhang 
37397929ea7SJunchao Zhang     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
3749566063dSJacob Faibussowitsch     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
3759566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
3768a53a0a4SSajid Ali     if (iscuda) {
3779566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECCUDA));
3788a53a0a4SSajid Ali     } else {
3799566063dSJacob Faibussowitsch       PetscCall(VecSetType(x, VECSTANDARD));
3808a53a0a4SSajid Ali     }
3819566063dSJacob Faibussowitsch     PetscCall(VecSetUp(x));
3829566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(x,&low,&high));
3839566063dSJacob Faibussowitsch     for (i=low; i<high; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES));
3849566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(x));
3859566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(x));
38697929ea7SJunchao Zhang 
38797929ea7SJunchao Zhang     /* Every subcomm has a y as long as x */
3889566063dSJacob Faibussowitsch     PetscCall(VecCreate(subcomm, &y));
3899566063dSJacob Faibussowitsch     PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
3908a53a0a4SSajid Ali     if (iscuda) {
3919566063dSJacob Faibussowitsch       PetscCall(VecSetType(y, VECCUDA));
3928a53a0a4SSajid Ali     } else {
3939566063dSJacob Faibussowitsch       PetscCall(VecSetType(y, VECSTANDARD));
3948a53a0a4SSajid Ali     }
3959566063dSJacob Faibussowitsch     PetscCall(VecSetUp(y));
3969566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(y,&n));
39797929ea7SJunchao Zhang 
39897929ea7SJunchao Zhang     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
39997929ea7SJunchao Zhang        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
40097929ea7SJunchao Zhang        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
40197929ea7SJunchao Zhang        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
40297929ea7SJunchao Zhang     */
4038a53a0a4SSajid Ali     if (iscuda) {
4048a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
4059566063dSJacob Faibussowitsch         PetscCall(VecCUDAGetArray(y,&yvalue));
4069566063dSJacob Faibussowitsch         PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4078a53a0a4SSajid Ali       #endif
4088a53a0a4SSajid Ali     } else {
4099566063dSJacob Faibussowitsch       PetscCall(VecGetArray(y,&yvalue));
4109566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg));
4118a53a0a4SSajid Ali     }
4129566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)yg,"yg_on_subcomms")); /* Give a name to view yg clearly */
41397929ea7SJunchao Zhang 
41497929ea7SJunchao Zhang     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
41597929ea7SJunchao Zhang        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
41697929ea7SJunchao Zhang      */
4179566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(y,&xstart,NULL));
4189566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(yg,&ystart,NULL));
41997929ea7SJunchao Zhang 
4209566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix));
4219566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy));
4229566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x,ix,yg,iy,&vscat));
4239566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
4249566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD));
42597929ea7SJunchao Zhang 
42697929ea7SJunchao Zhang     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
4279566063dSJacob Faibussowitsch     PetscCall(VecView(yg,PETSC_VIEWER_STDOUT_WORLD));
4289566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&yg));
42997929ea7SJunchao Zhang 
43097929ea7SJunchao Zhang     /* Restory yvalue so that processes in subcomm can use y from now on. */
4318a53a0a4SSajid Ali     if (iscuda) {
4328a53a0a4SSajid Ali       #if defined(PETSC_HAVE_CUDA)
4339566063dSJacob Faibussowitsch         PetscCall(VecCUDARestoreArray(y,&yvalue));
4348a53a0a4SSajid Ali       #endif
4358a53a0a4SSajid Ali     } else {
4369566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(y,&yvalue));
4378a53a0a4SSajid Ali     }
4389566063dSJacob Faibussowitsch     PetscCall(VecScale(y,3.0));
43997929ea7SJunchao Zhang 
4409566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */
4419566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iy));
4429566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&x));
4439566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&y));
4449566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vscat));
44597929ea7SJunchao Zhang   } /* world2subs */
44697929ea7SJunchao Zhang 
4479566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&subcomm));
4489566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
449b122ec5aSJacob Faibussowitsch   return 0;
45097929ea7SJunchao Zhang }
45197929ea7SJunchao Zhang 
45297929ea7SJunchao Zhang /*TEST
45397929ea7SJunchao Zhang 
45497929ea7SJunchao Zhang    build:
455dfd57a17SPierre Jolivet      requires: !defined(PETSC_HAVE_MPIUNI)
456c6c723bbSStefano Zampini 
45797929ea7SJunchao Zhang    testset:
45897929ea7SJunchao Zhang      nsize: 7
45997929ea7SJunchao Zhang 
46097929ea7SJunchao Zhang      test:
46197929ea7SJunchao Zhang        suffix: 1
46297929ea7SJunchao Zhang        args: -world2sub
46397929ea7SJunchao Zhang 
46497929ea7SJunchao Zhang      test:
46597929ea7SJunchao Zhang        suffix: 2
46697929ea7SJunchao Zhang        args: -sub2sub
467f424265bSStefano Zampini        # deadlocks with NECMPI and INTELMPI (20210400300)
468f424265bSStefano Zampini        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
46997929ea7SJunchao Zhang 
47097929ea7SJunchao Zhang      test:
47197929ea7SJunchao Zhang        suffix: 3
47297929ea7SJunchao Zhang        args: -world2subs
47397929ea7SJunchao Zhang 
47497929ea7SJunchao Zhang      test:
47597929ea7SJunchao Zhang        suffix: 4
4768a53a0a4SSajid Ali        args: -world2sub -vectype cuda
4778a53a0a4SSajid Ali        requires: cuda
4788a53a0a4SSajid Ali 
4798a53a0a4SSajid Ali      test:
4808a53a0a4SSajid Ali        suffix: 5
4818a53a0a4SSajid Ali        args: -sub2sub -vectype cuda
4828a53a0a4SSajid Ali        requires: cuda
4838a53a0a4SSajid Ali 
4848a53a0a4SSajid Ali      test:
4858a53a0a4SSajid Ali       suffix: 6
4868a53a0a4SSajid Ali       args: -world2subs -vectype cuda
4878a53a0a4SSajid Ali       requires: cuda
4888a53a0a4SSajid Ali 
4898a53a0a4SSajid Ali      test:
4908a53a0a4SSajid Ali        suffix: 7
49197929ea7SJunchao Zhang        args: -world2sub -sf_type neighbor
49297929ea7SJunchao Zhang        output_file: output/ex9_1.out
493c6c723bbSStefano Zampini        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
494c6c723bbSStefano Zampini        # segfaults with NECMPI
495c6c723bbSStefano Zampini        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
49697929ea7SJunchao Zhang TEST*/
497