197929ea7SJunchao Zhang static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\ 297929ea7SJunchao Zhang 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\ 38a53a0a4SSajid Ali required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\ 48a53a0a4SSajid Ali To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n"; 597929ea7SJunchao Zhang 697929ea7SJunchao Zhang #include <petscvec.h> 797929ea7SJunchao Zhang int main(int argc,char **argv) 897929ea7SJunchao Zhang { 997929ea7SJunchao Zhang PetscErrorCode ierr; 1097929ea7SJunchao Zhang PetscMPIInt nproc,grank,mycolor; 1197929ea7SJunchao Zhang PetscInt i,n,N = 20,low,high; 1297929ea7SJunchao Zhang MPI_Comm subcomm; 138a53a0a4SSajid Ali Vec x = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 148a53a0a4SSajid Ali Vec yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */ 1597929ea7SJunchao Zhang VecScatter vscat; 1697929ea7SJunchao Zhang IS ix,iy; 178a53a0a4SSajid Ali PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */ 188a53a0a4SSajid Ali PetscBool optionflag, compareflag; 198a53a0a4SSajid Ali char vectypename[PETSC_MAX_PATH_LEN]; 2097929ea7SJunchao Zhang PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */ 2197929ea7SJunchao Zhang PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */ 2297929ea7SJunchao Zhang PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */ 2397929ea7SJunchao Zhang 2497929ea7SJunchao Zhang ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 25ffc4695bSBarry Smith ierr = MPI_Comm_size(PETSC_COMM_WORLD,&nproc);CHKERRMPI(ierr); 26ffc4695bSBarry Smith ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&grank);CHKERRMPI(ierr); 2797929ea7SJunchao Zhang 2897929ea7SJunchao Zhang if (nproc < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run"); 2997929ea7SJunchao Zhang 3097929ea7SJunchao Zhang ierr = PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL);CHKERRQ(ierr); 3197929ea7SJunchao Zhang ierr = PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL);CHKERRQ(ierr); 3297929ea7SJunchao Zhang ierr = PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL);CHKERRQ(ierr); 338a53a0a4SSajid Ali ierr = PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag);CHKERRQ(ierr); 348a53a0a4SSajid Ali if (optionflag) { 358a53a0a4SSajid Ali ierr = PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);CHKERRQ(ierr); 368a53a0a4SSajid Ali if (compareflag) iscuda = PETSC_TRUE; 378a53a0a4SSajid Ali } 3897929ea7SJunchao Zhang 3997929ea7SJunchao Zhang /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */ 4097929ea7SJunchao Zhang mycolor = grank % 3; 4155b25c41SPierre Jolivet ierr = MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm);CHKERRMPI(ierr); 4297929ea7SJunchao Zhang 4397929ea7SJunchao Zhang /*=========================================================================== 4497929ea7SJunchao Zhang * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on 4597929ea7SJunchao Zhang * a subcommunicator of PETSC_COMM_WORLD and vice versa. 4697929ea7SJunchao Zhang *===========================================================================*/ 4797929ea7SJunchao Zhang if (world2sub) { 488a53a0a4SSajid Ali ierr = VecCreate(PETSC_COMM_WORLD, &x);CHKERRQ(ierr); 498a53a0a4SSajid Ali ierr = VecSetSizes(x, PETSC_DECIDE, N);CHKERRQ(ierr); 508a53a0a4SSajid Ali if (iscuda) { 518a53a0a4SSajid Ali ierr = VecSetType(x, VECCUDA);CHKERRQ(ierr); 528a53a0a4SSajid Ali } else { 538a53a0a4SSajid Ali ierr = VecSetType(x, VECSTANDARD);CHKERRQ(ierr); 548a53a0a4SSajid Ali } 558a53a0a4SSajid Ali ierr = VecSetUp(x);CHKERRQ(ierr); 5697929ea7SJunchao Zhang ierr = PetscObjectSetName((PetscObject)x,"x_commworld");CHKERRQ(ierr); /* Give a name to view x clearly */ 5797929ea7SJunchao Zhang 5897929ea7SJunchao Zhang /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */ 5997929ea7SJunchao Zhang ierr = VecGetOwnershipRange(x,&low,&high);CHKERRQ(ierr); 6097929ea7SJunchao Zhang for (i=low; i<high; i++) { 6197929ea7SJunchao Zhang PetscScalar val = -i; 6297929ea7SJunchao Zhang ierr = VecSetValue(x,i,val,INSERT_VALUES);CHKERRQ(ierr); 6397929ea7SJunchao Zhang } 6497929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 6597929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 6697929ea7SJunchao Zhang 6797929ea7SJunchao Zhang /* Transfer x to a vector y only defined on subcomm0 and vice versa */ 6897929ea7SJunchao Zhang if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */ 6997929ea7SJunchao Zhang Vec y; 7097929ea7SJunchao Zhang PetscScalar *yvalue; 718a53a0a4SSajid Ali ierr = VecCreate(subcomm, &y);CHKERRQ(ierr); 728a53a0a4SSajid Ali ierr = VecSetSizes(y, PETSC_DECIDE, N);CHKERRQ(ierr); 738a53a0a4SSajid Ali if (iscuda) { 748a53a0a4SSajid Ali ierr = VecSetType(y, VECCUDA);CHKERRQ(ierr); 758a53a0a4SSajid Ali } else { 768a53a0a4SSajid Ali ierr = VecSetType(y, VECSTANDARD);CHKERRQ(ierr); 778a53a0a4SSajid Ali } 788a53a0a4SSajid Ali ierr = VecSetUp(y);CHKERRQ(ierr); 7997929ea7SJunchao Zhang ierr = PetscObjectSetName((PetscObject)y,"y_subcomm_0");CHKERRQ(ierr); /* Give a name to view y clearly */ 8097929ea7SJunchao Zhang ierr = VecGetLocalSize(y,&n);CHKERRQ(ierr); 818a53a0a4SSajid Ali if (iscuda) { 828a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 838a53a0a4SSajid Ali ierr = VecCUDAGetArray(y,&yvalue);CHKERRQ(ierr); 848a53a0a4SSajid Ali #endif 858a53a0a4SSajid Ali } else { 8697929ea7SJunchao Zhang ierr = VecGetArray(y,&yvalue);CHKERRQ(ierr); 878a53a0a4SSajid Ali } 8897929ea7SJunchao Zhang /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue. 8997929ea7SJunchao Zhang Note this is a collective call. All processes have to call it and supply consistent N. 9097929ea7SJunchao Zhang */ 918a53a0a4SSajid Ali if (iscuda) { 928a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 938a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);CHKERRQ(ierr); 948a53a0a4SSajid Ali #endif 958a53a0a4SSajid Ali } else { 9697929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);CHKERRQ(ierr); 978a53a0a4SSajid Ali } 9897929ea7SJunchao Zhang 9997929ea7SJunchao Zhang /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */ 10097929ea7SJunchao Zhang ierr = VecGetOwnershipRange(yg,&low,&high);CHKERRQ(ierr); /* low, high are global indices */ 10197929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);CHKERRQ(ierr); 10297929ea7SJunchao Zhang ierr = ISDuplicate(ix,&iy);CHKERRQ(ierr); 10397929ea7SJunchao Zhang 10497929ea7SJunchao Zhang /* Union of ix's on subcomm0 covers the full range of [0,N) */ 10597929ea7SJunchao Zhang ierr = VecScatterCreate(x,ix,yg,iy,&vscat);CHKERRQ(ierr); 10697929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10797929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10897929ea7SJunchao Zhang 10997929ea7SJunchao Zhang /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations. 11097929ea7SJunchao Zhang VecGetArray must be paired with VecRestoreArray. 11197929ea7SJunchao Zhang */ 1128a53a0a4SSajid Ali if (iscuda) { 1138a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1148a53a0a4SSajid Ali ierr = VecCUDARestoreArray(y,&yvalue);CHKERRQ(ierr); 1158a53a0a4SSajid Ali #endif 1168a53a0a4SSajid Ali } else { 11797929ea7SJunchao Zhang ierr = VecRestoreArray(y,&yvalue);CHKERRQ(ierr); 1188a53a0a4SSajid Ali } 11997929ea7SJunchao Zhang 12097929ea7SJunchao Zhang /* Libraries on subcomm0 can safely use y now, for example, view and scale it */ 12197929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_(subcomm));CHKERRQ(ierr); 12297929ea7SJunchao Zhang ierr = VecScale(y,2.0);CHKERRQ(ierr); 12397929ea7SJunchao Zhang 12497929ea7SJunchao Zhang /* Send the new y back to x */ 12597929ea7SJunchao Zhang ierr = VecGetArray(y,&yvalue);CHKERRQ(ierr); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */ 12697929ea7SJunchao Zhang /* Supply new yvalue to yg without memory copying */ 12797929ea7SJunchao Zhang ierr = VecPlaceArray(yg,yvalue);CHKERRQ(ierr); 12897929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 12997929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13097929ea7SJunchao Zhang ierr = VecResetArray(yg);CHKERRQ(ierr); 1318a53a0a4SSajid Ali if (iscuda) { 1328a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1338a53a0a4SSajid Ali ierr = VecCUDARestoreArray(y,&yvalue);CHKERRQ(ierr); 1348a53a0a4SSajid Ali #endif 1358a53a0a4SSajid Ali } else { 13697929ea7SJunchao Zhang ierr = VecRestoreArray(y,&yvalue);CHKERRQ(ierr); 1378a53a0a4SSajid Ali } 13897929ea7SJunchao Zhang 13997929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 14097929ea7SJunchao Zhang } else { 14197929ea7SJunchao Zhang /* Ranks outside of subcomm0 do not supply values to yg */ 1428a53a0a4SSajid Ali if (iscuda) { 1438a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 1448a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);CHKERRQ(ierr); 1458a53a0a4SSajid Ali #endif 1468a53a0a4SSajid Ali } else { 14797929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);CHKERRQ(ierr); 1488a53a0a4SSajid Ali } 14997929ea7SJunchao Zhang 15097929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. The remaining 15197929ea7SJunchao Zhang ranks just need to create empty ISes to cheat VecScatterCreate. 15297929ea7SJunchao Zhang */ 15397929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);CHKERRQ(ierr); 15497929ea7SJunchao Zhang ierr = ISDuplicate(ix,&iy);CHKERRQ(ierr); 15597929ea7SJunchao Zhang 15697929ea7SJunchao Zhang ierr = VecScatterCreate(x,ix,yg,iy,&vscat);CHKERRQ(ierr); 15797929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 15897929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 15997929ea7SJunchao Zhang 16097929ea7SJunchao Zhang /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send. 16197929ea7SJunchao Zhang But they have to call VecScatterBegin/End since these routines are collective. 16297929ea7SJunchao Zhang */ 16397929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 16497929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 16597929ea7SJunchao Zhang } 16697929ea7SJunchao Zhang 16797929ea7SJunchao Zhang ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 16897929ea7SJunchao Zhang ierr = ISDestroy(&ix);CHKERRQ(ierr); 16997929ea7SJunchao Zhang ierr = ISDestroy(&iy);CHKERRQ(ierr); 17097929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 17197929ea7SJunchao Zhang ierr = VecDestroy(&yg);CHKERRQ(ierr); 17297929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 17397929ea7SJunchao Zhang } /* world2sub */ 17497929ea7SJunchao Zhang 17597929ea7SJunchao Zhang /*=========================================================================== 17697929ea7SJunchao Zhang * Transfer a vector x defined on subcomm0 to a vector y defined on 17797929ea7SJunchao Zhang * subcomm1. The two subcomms are not overlapping and their union is 17897929ea7SJunchao Zhang * not necessarily equal to PETSC_COMM_WORLD. 17997929ea7SJunchao Zhang *===========================================================================*/ 18097929ea7SJunchao Zhang if (sub2sub) { 18197929ea7SJunchao Zhang if (mycolor == 0) { 18297929ea7SJunchao Zhang /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */ 18397929ea7SJunchao Zhang PetscInt n,N = 22; 18497929ea7SJunchao Zhang Vec x,xg,yg; 18597929ea7SJunchao Zhang IS ix,iy; 18697929ea7SJunchao Zhang VecScatter vscat; 18797929ea7SJunchao Zhang const PetscScalar *xvalue; 18897929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 18997929ea7SJunchao Zhang PetscMPIInt lrank; 19097929ea7SJunchao Zhang 191ffc4695bSBarry Smith ierr = MPI_Comm_rank(subcomm,&lrank);CHKERRMPI(ierr); 1928a53a0a4SSajid Ali /* x is on subcomm */ 1938a53a0a4SSajid Ali ierr = VecCreate(subcomm, &x);CHKERRQ(ierr); 1948a53a0a4SSajid Ali ierr = VecSetSizes(x, PETSC_DECIDE, N);CHKERRQ(ierr); 1958a53a0a4SSajid Ali if (iscuda) { 1968a53a0a4SSajid Ali ierr = VecSetType(x, VECCUDA);CHKERRQ(ierr); 1978a53a0a4SSajid Ali } else { 1988a53a0a4SSajid Ali ierr = VecSetType(x, VECSTANDARD);CHKERRQ(ierr); 1998a53a0a4SSajid Ali } 2008a53a0a4SSajid Ali ierr = VecSetUp(x);CHKERRQ(ierr); 20197929ea7SJunchao Zhang ierr = VecGetOwnershipRange(x,&low,&high);CHKERRQ(ierr); 20297929ea7SJunchao Zhang 20397929ea7SJunchao Zhang /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */ 20497929ea7SJunchao Zhang for (i=low; i<high; i++) { 20597929ea7SJunchao Zhang PetscScalar val = i; 20697929ea7SJunchao Zhang ierr = VecSetValue(x,i,val,INSERT_VALUES);CHKERRQ(ierr); 20797929ea7SJunchao Zhang } 20897929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 20997929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 21097929ea7SJunchao Zhang 211ffc4695bSBarry Smith ierr = MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm);CHKERRMPI(ierr); 21297929ea7SJunchao Zhang 21397929ea7SJunchao Zhang /* Tell rank 0 of subcomm1 the global size of x */ 214ffc4695bSBarry Smith if (!lrank) {ierr = MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm);CHKERRMPI(ierr);} 21597929ea7SJunchao Zhang 21697929ea7SJunchao Zhang /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm. 21797929ea7SJunchao Zhang But this order actually does not matter, since what we care is vector y, which is defined on subcomm1. 21897929ea7SJunchao Zhang */ 219ffc4695bSBarry Smith ierr = MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm);CHKERRMPI(ierr); 22097929ea7SJunchao Zhang 22197929ea7SJunchao Zhang /* Create a vector xg on parentcomm, which shares memory with x */ 22297929ea7SJunchao Zhang ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr); 2238a53a0a4SSajid Ali if (iscuda) { 2248a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2258a53a0a4SSajid Ali ierr = VecCUDAGetArrayRead(x,&xvalue);CHKERRQ(ierr); 2268a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg);CHKERRQ(ierr); 2278a53a0a4SSajid Ali #endif 2288a53a0a4SSajid Ali } else { 2298a53a0a4SSajid Ali ierr = VecGetArrayRead(x,&xvalue);CHKERRQ(ierr); 23097929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg);CHKERRQ(ierr); 2318a53a0a4SSajid Ali } 23297929ea7SJunchao Zhang 23397929ea7SJunchao Zhang /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */ 2348a53a0a4SSajid Ali if (iscuda) { 2358a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2368a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);CHKERRQ(ierr); 2378a53a0a4SSajid Ali #endif 2388a53a0a4SSajid Ali } else { 23997929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);CHKERRQ(ierr); 2408a53a0a4SSajid Ali } 24197929ea7SJunchao Zhang 24297929ea7SJunchao Zhang /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */ 24397929ea7SJunchao Zhang ierr = VecGetOwnershipRange(xg,&low,&high);CHKERRQ(ierr); /* low, high are global indices of xg */ 24497929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);CHKERRQ(ierr); 24597929ea7SJunchao Zhang ierr = ISDuplicate(ix,&iy);CHKERRQ(ierr); 24697929ea7SJunchao Zhang ierr = VecScatterCreate(xg,ix,yg,iy,&vscat);CHKERRQ(ierr); 24797929ea7SJunchao Zhang 24897929ea7SJunchao Zhang /* Scatter values from xg to yg */ 24997929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 25097929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 25197929ea7SJunchao Zhang 25297929ea7SJunchao Zhang /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */ 2538a53a0a4SSajid Ali if (iscuda) { 2548a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2558a53a0a4SSajid Ali ierr = VecCUDARestoreArrayRead(x,&xvalue);CHKERRQ(ierr); 2568a53a0a4SSajid Ali #endif 2578a53a0a4SSajid Ali } else { 25897929ea7SJunchao Zhang ierr = VecRestoreArrayRead(x,&xvalue);CHKERRQ(ierr); 2598a53a0a4SSajid Ali } 26097929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 26197929ea7SJunchao Zhang ierr = ISDestroy(&ix);CHKERRQ(ierr); 26297929ea7SJunchao Zhang ierr = ISDestroy(&iy);CHKERRQ(ierr); 26397929ea7SJunchao Zhang ierr = VecDestroy(&xg);CHKERRQ(ierr); 26497929ea7SJunchao Zhang ierr = VecDestroy(&yg);CHKERRQ(ierr); 26597929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 266ffc4695bSBarry Smith ierr = MPI_Comm_free(&intercomm);CHKERRMPI(ierr); 267ffc4695bSBarry Smith ierr = MPI_Comm_free(&parentcomm);CHKERRMPI(ierr); 26897929ea7SJunchao Zhang } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */ 26997929ea7SJunchao Zhang PetscInt n,N; 27097929ea7SJunchao Zhang Vec y,xg,yg; 27197929ea7SJunchao Zhang IS ix,iy; 27297929ea7SJunchao Zhang VecScatter vscat; 27397929ea7SJunchao Zhang PetscScalar *yvalue; 27497929ea7SJunchao Zhang MPI_Comm intercomm,parentcomm; 27597929ea7SJunchao Zhang PetscMPIInt lrank; 27697929ea7SJunchao Zhang 277ffc4695bSBarry Smith ierr = MPI_Comm_rank(subcomm,&lrank);CHKERRMPI(ierr); 278ffc4695bSBarry Smith ierr = MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm);CHKERRMPI(ierr); 27997929ea7SJunchao Zhang 28097929ea7SJunchao Zhang /* Two rank-0 are talking */ 281ffc4695bSBarry Smith if (!lrank) {ierr = MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE);CHKERRMPI(ierr);} 28297929ea7SJunchao Zhang /* Rank 0 of subcomm1 bcasts N to its members */ 283ffc4695bSBarry Smith ierr = MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm);CHKERRMPI(ierr); 28497929ea7SJunchao Zhang 28597929ea7SJunchao Zhang /* Create a intracomm Petsc can work on */ 286ffc4695bSBarry Smith ierr = MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm);CHKERRMPI(ierr); 28797929ea7SJunchao Zhang 28897929ea7SJunchao Zhang /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/ 2898a53a0a4SSajid Ali if (iscuda) { 2908a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 2918a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);CHKERRQ(ierr); 2928a53a0a4SSajid Ali #endif 2938a53a0a4SSajid Ali } else { 29497929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);CHKERRQ(ierr); 2958a53a0a4SSajid Ali } 29697929ea7SJunchao Zhang 2978a53a0a4SSajid Ali ierr = VecCreate(subcomm, &y);CHKERRQ(ierr); 2988a53a0a4SSajid Ali ierr = VecSetSizes(y, PETSC_DECIDE, N);CHKERRQ(ierr); 2998a53a0a4SSajid Ali if (iscuda) { 3008a53a0a4SSajid Ali ierr = VecSetType(y, VECCUDA);CHKERRQ(ierr); 3018a53a0a4SSajid Ali } else { 3028a53a0a4SSajid Ali ierr = VecSetType(y, VECSTANDARD);CHKERRQ(ierr); 3038a53a0a4SSajid Ali } 3048a53a0a4SSajid Ali ierr = VecSetUp(y);CHKERRQ(ierr); 3058a53a0a4SSajid Ali 30697929ea7SJunchao Zhang ierr = PetscObjectSetName((PetscObject)y,"y_subcomm_1");CHKERRQ(ierr); /* Give a name to view y clearly */ 30797929ea7SJunchao Zhang ierr = VecGetLocalSize(y,&n);CHKERRQ(ierr); 3088a53a0a4SSajid Ali if (iscuda) { 3098a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3108a53a0a4SSajid Ali ierr = VecCUDAGetArray(y,&yvalue);CHKERRQ(ierr); 3118a53a0a4SSajid Ali #endif 3128a53a0a4SSajid Ali } else { 31397929ea7SJunchao Zhang ierr = VecGetArray(y,&yvalue);CHKERRQ(ierr); 3148a53a0a4SSajid Ali } 31597929ea7SJunchao Zhang /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be 31697929ea7SJunchao Zhang created in the same order in subcomm0/1. For example, we can not reverse the order of 31797929ea7SJunchao Zhang creating xg and yg in subcomm1. 31897929ea7SJunchao Zhang */ 3198a53a0a4SSajid Ali if (iscuda) { 3208a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3218a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);CHKERRQ(ierr); 3228a53a0a4SSajid Ali #endif 3238a53a0a4SSajid Ali } else { 32497929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);CHKERRQ(ierr); 3258a53a0a4SSajid Ali } 32697929ea7SJunchao Zhang 32797929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. 32897929ea7SJunchao Zhang ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate. 32997929ea7SJunchao Zhang */ 33097929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);CHKERRQ(ierr); 33197929ea7SJunchao Zhang ierr = ISDuplicate(ix,&iy);CHKERRQ(ierr); 33297929ea7SJunchao Zhang ierr = VecScatterCreate(xg,ix,yg,iy,&vscat);CHKERRQ(ierr); 33397929ea7SJunchao Zhang 33497929ea7SJunchao Zhang /* Scatter values from xg to yg */ 33597929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 33697929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 33797929ea7SJunchao Zhang 33897929ea7SJunchao Zhang /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */ 3398a53a0a4SSajid Ali if (iscuda) { 3408a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 3418a53a0a4SSajid Ali ierr = VecCUDARestoreArray(y,&yvalue);CHKERRQ(ierr); 3428a53a0a4SSajid Ali #endif 3438a53a0a4SSajid Ali } else { 34497929ea7SJunchao Zhang ierr = VecRestoreArray(y,&yvalue);CHKERRQ(ierr); 3458a53a0a4SSajid Ali } 34697929ea7SJunchao Zhang 34797929ea7SJunchao Zhang /* Libraries on subcomm1 can safely use y now, for example, view it */ 34897929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_(subcomm));CHKERRQ(ierr); 34997929ea7SJunchao Zhang 35097929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 35197929ea7SJunchao Zhang ierr = ISDestroy(&ix);CHKERRQ(ierr); 35297929ea7SJunchao Zhang ierr = ISDestroy(&iy);CHKERRQ(ierr); 35397929ea7SJunchao Zhang ierr = VecDestroy(&xg);CHKERRQ(ierr); 35497929ea7SJunchao Zhang ierr = VecDestroy(&yg);CHKERRQ(ierr); 35597929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 356ffc4695bSBarry Smith ierr = MPI_Comm_free(&intercomm);CHKERRMPI(ierr); 357ffc4695bSBarry Smith ierr = MPI_Comm_free(&parentcomm);CHKERRMPI(ierr); 35897929ea7SJunchao Zhang } else if (mycolor == 2) { /* subcomm2 */ 35997929ea7SJunchao Zhang /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */ 36097929ea7SJunchao Zhang } 36197929ea7SJunchao Zhang } /* sub2sub */ 36297929ea7SJunchao Zhang 36397929ea7SJunchao Zhang /*=========================================================================== 36497929ea7SJunchao Zhang * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on 36597929ea7SJunchao Zhang * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers 36697929ea7SJunchao Zhang * as we did in case 1, but that is not efficient. Instead, we use one vecscatter 36797929ea7SJunchao Zhang * to achieve that. 36897929ea7SJunchao Zhang *===========================================================================*/ 36997929ea7SJunchao Zhang if (world2subs) { 3708a53a0a4SSajid Ali Vec y; 37197929ea7SJunchao Zhang PetscInt n,N=15,xstart,ystart,low,high; 37297929ea7SJunchao Zhang PetscScalar *yvalue; 37397929ea7SJunchao Zhang 37497929ea7SJunchao Zhang /* Initialize x to [0, 1, 2, 3, ..., N-1] */ 3758a53a0a4SSajid Ali ierr = VecCreate(PETSC_COMM_WORLD, &x);CHKERRQ(ierr); 3768a53a0a4SSajid Ali ierr = VecSetSizes(x, PETSC_DECIDE, N);CHKERRQ(ierr); 3778a53a0a4SSajid Ali if (iscuda) { 3788a53a0a4SSajid Ali ierr = VecSetType(x, VECCUDA);CHKERRQ(ierr); 3798a53a0a4SSajid Ali } else { 3808a53a0a4SSajid Ali ierr = VecSetType(x, VECSTANDARD);CHKERRQ(ierr); 3818a53a0a4SSajid Ali } 3828a53a0a4SSajid Ali ierr = VecSetUp(x);CHKERRQ(ierr); 38397929ea7SJunchao Zhang ierr = VecGetOwnershipRange(x,&low,&high);CHKERRQ(ierr); 38497929ea7SJunchao Zhang for (i=low; i<high; i++) {ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr);} 38597929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 38697929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 38797929ea7SJunchao Zhang 38897929ea7SJunchao Zhang /* Every subcomm has a y as long as x */ 3898a53a0a4SSajid Ali ierr = VecCreate(subcomm, &y);CHKERRQ(ierr); 3908a53a0a4SSajid Ali ierr = VecSetSizes(y, PETSC_DECIDE, N);CHKERRQ(ierr); 3918a53a0a4SSajid Ali if (iscuda) { 3928a53a0a4SSajid Ali ierr = VecSetType(y, VECCUDA);CHKERRQ(ierr); 3938a53a0a4SSajid Ali } else { 3948a53a0a4SSajid Ali ierr = VecSetType(y, VECSTANDARD);CHKERRQ(ierr); 3958a53a0a4SSajid Ali } 3968a53a0a4SSajid Ali ierr = VecSetUp(y);CHKERRQ(ierr); 39797929ea7SJunchao Zhang ierr = VecGetLocalSize(y,&n);CHKERRQ(ierr); 39897929ea7SJunchao Zhang 39997929ea7SJunchao Zhang /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators). 40097929ea7SJunchao Zhang Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not 40197929ea7SJunchao Zhang necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank 40297929ea7SJunchao Zhang 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg. 40397929ea7SJunchao Zhang */ 4048a53a0a4SSajid Ali if (iscuda) { 4058a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4068a53a0a4SSajid Ali ierr = VecCUDAGetArray(y,&yvalue);CHKERRQ(ierr); 4078a53a0a4SSajid Ali ierr = VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);CHKERRQ(ierr); 4088a53a0a4SSajid Ali #endif 4098a53a0a4SSajid Ali } else { 41097929ea7SJunchao Zhang ierr = VecGetArray(y,&yvalue);CHKERRQ(ierr); 41197929ea7SJunchao Zhang ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);CHKERRQ(ierr); 4128a53a0a4SSajid Ali } 41397929ea7SJunchao Zhang ierr = PetscObjectSetName((PetscObject)yg,"yg_on_subcomms");CHKERRQ(ierr); /* Give a name to view yg clearly */ 41497929ea7SJunchao Zhang 41597929ea7SJunchao Zhang /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y, 41697929ea7SJunchao Zhang since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg. 41797929ea7SJunchao Zhang */ 41897929ea7SJunchao Zhang ierr = VecGetOwnershipRange(y,&xstart,NULL);CHKERRQ(ierr); 41997929ea7SJunchao Zhang ierr = VecGetOwnershipRange(yg,&ystart,NULL);CHKERRQ(ierr); 42097929ea7SJunchao Zhang 42197929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);CHKERRQ(ierr); 42297929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);CHKERRQ(ierr); 42397929ea7SJunchao Zhang ierr = VecScatterCreate(x,ix,yg,iy,&vscat);CHKERRQ(ierr); 42497929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 42597929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 42697929ea7SJunchao Zhang 42797929ea7SJunchao Zhang /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */ 42897929ea7SJunchao Zhang ierr = VecView(yg,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 42997929ea7SJunchao Zhang ierr = VecDestroy(&yg);CHKERRQ(ierr); 43097929ea7SJunchao Zhang 43197929ea7SJunchao Zhang /* Restory yvalue so that processes in subcomm can use y from now on. */ 4328a53a0a4SSajid Ali if (iscuda) { 4338a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA) 4348a53a0a4SSajid Ali ierr = VecCUDARestoreArray(y,&yvalue);CHKERRQ(ierr); 4358a53a0a4SSajid Ali #endif 4368a53a0a4SSajid Ali } else { 43797929ea7SJunchao Zhang ierr = VecRestoreArray(y,&yvalue);CHKERRQ(ierr); 4388a53a0a4SSajid Ali } 43997929ea7SJunchao Zhang ierr = VecScale(y,3.0);CHKERRQ(ierr); 44097929ea7SJunchao Zhang 44197929ea7SJunchao Zhang ierr = ISDestroy(&ix);CHKERRQ(ierr); /* One can also destroy ix, iy immediately after VecScatterCreate() */ 44297929ea7SJunchao Zhang ierr = ISDestroy(&iy);CHKERRQ(ierr); 44397929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 44497929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 44597929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 44697929ea7SJunchao Zhang } /* world2subs */ 44797929ea7SJunchao Zhang 448ffc4695bSBarry Smith ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 44997929ea7SJunchao Zhang ierr = PetscFinalize(); 45097929ea7SJunchao Zhang return ierr; 45197929ea7SJunchao Zhang } 45297929ea7SJunchao Zhang 45397929ea7SJunchao Zhang /*TEST 45497929ea7SJunchao Zhang 45597929ea7SJunchao Zhang build: 456*dfd57a17SPierre Jolivet requires: !defined(PETSC_HAVE_MPIUNI) 45797929ea7SJunchao Zhang testset: 45897929ea7SJunchao Zhang nsize: 7 45997929ea7SJunchao Zhang 46097929ea7SJunchao Zhang test: 46197929ea7SJunchao Zhang suffix: 1 46297929ea7SJunchao Zhang args: -world2sub 46397929ea7SJunchao Zhang 46497929ea7SJunchao Zhang test: 46597929ea7SJunchao Zhang suffix: 2 46697929ea7SJunchao Zhang args: -sub2sub 46797929ea7SJunchao Zhang 46897929ea7SJunchao Zhang test: 46997929ea7SJunchao Zhang suffix: 3 47097929ea7SJunchao Zhang args: -world2subs 47197929ea7SJunchao Zhang 47297929ea7SJunchao Zhang test: 47397929ea7SJunchao Zhang suffix: 4 4748a53a0a4SSajid Ali args: -world2sub -vectype cuda 4758a53a0a4SSajid Ali requires: cuda 4768a53a0a4SSajid Ali 4778a53a0a4SSajid Ali test: 4788a53a0a4SSajid Ali suffix: 5 4798a53a0a4SSajid Ali args: -sub2sub -vectype cuda 4808a53a0a4SSajid Ali requires: cuda 4818a53a0a4SSajid Ali 4828a53a0a4SSajid Ali test: 4838a53a0a4SSajid Ali suffix: 6 4848a53a0a4SSajid Ali args: -world2subs -vectype cuda 4858a53a0a4SSajid Ali requires: cuda 4868a53a0a4SSajid Ali 4878a53a0a4SSajid Ali test: 4888a53a0a4SSajid Ali suffix: 7 48997929ea7SJunchao Zhang args: -world2sub -sf_type neighbor 49097929ea7SJunchao Zhang output_file: output/ex9_1.out 491*dfd57a17SPierre Jolivet # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 492*dfd57a17SPierre Jolivet requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 49397929ea7SJunchao Zhang TEST*/ 49497929ea7SJunchao Zhang 495