197929ea7SJunchao Zhang static char help[]= " Test VecScatterRemap() on various vecscatter. \n\ 297929ea7SJunchao Zhang We may do optimization based on index patterns. After index remapping by VecScatterRemap(), we need to \n\ 397929ea7SJunchao Zhang make sure the vecscatter works as expected with the optimizaiton. \n\ 497929ea7SJunchao Zhang VecScatterRemap() does not support all kinds of vecscatters. In addition, it only supports remapping \n\ 597929ea7SJunchao Zhang entries where we read the data (i.e., todata in paralle scatter, fromdata in sequential scatter). This test \n\ 697929ea7SJunchao Zhang tests VecScatterRemap on parallel to paralle (PtoP) vecscatter, sequential general to sequential \n\ 797929ea7SJunchao Zhang general (SGToSG) vecscatter and sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter.\n\n"; 897929ea7SJunchao Zhang 997929ea7SJunchao Zhang #include <petscvec.h> 1097929ea7SJunchao Zhang 1197929ea7SJunchao Zhang int main(int argc,char **argv) 1297929ea7SJunchao Zhang { 1397929ea7SJunchao Zhang PetscInt i,n,*ix,*iy,*tomap,start; 1497929ea7SJunchao Zhang Vec x,y; 1597929ea7SJunchao Zhang PetscMPIInt nproc,rank; 1697929ea7SJunchao Zhang IS isx,isy; 1797929ea7SJunchao Zhang const PetscInt *ranges; 1897929ea7SJunchao Zhang VecScatter vscat; 1997929ea7SJunchao Zhang 2097929ea7SJunchao Zhang PetscFunctionBegin; 21*9566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 22*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc)); 23*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 2497929ea7SJunchao Zhang 252c71b3e2SJacob Faibussowitsch PetscCheckFalse(nproc != 2,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This test must run with exactly two MPI ranks"); 2697929ea7SJunchao Zhang 2797929ea7SJunchao Zhang /* ==================================================================== 2897929ea7SJunchao Zhang (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter 2997929ea7SJunchao Zhang ==================================================================== 3097929ea7SJunchao Zhang */ 3197929ea7SJunchao Zhang 3297929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ 3397929ea7SJunchao Zhang 3497929ea7SJunchao Zhang /* create two MPI vectors x, y of length n=64, N=128 */ 35*9566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x)); 36*9566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x,&y)); 3797929ea7SJunchao Zhang 3897929ea7SJunchao Zhang /* Initialize x as {0~127} */ 39*9566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRanges(x,&ranges)); 40*9566063dSJacob Faibussowitsch for (i=ranges[rank]; i<ranges[rank+1]; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 41*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 42*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 4397929ea7SJunchao Zhang 4497929ea7SJunchao Zhang /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use 4597929ea7SJunchao Zhang it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter 4697929ea7SJunchao Zhang have both local scatter and remote scatter (i.e., MPI communication) 4797929ea7SJunchao Zhang */ 48*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n,&ix,n,&iy)); 4997929ea7SJunchao Zhang start = ranges[rank]; 5097929ea7SJunchao Zhang for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; 51*9566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx)); 5297929ea7SJunchao Zhang 53dd400576SPatrick Sanan if (rank == 0) { for (i=0; i<n; i++) iy[i] = i+32; } 5497929ea7SJunchao Zhang else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } 5597929ea7SJunchao Zhang 56*9566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy)); 5797929ea7SJunchao Zhang 5897929ea7SJunchao Zhang /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ 59*9566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,isx,y,isy,&vscat)); 60*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 61*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 6297929ea7SJunchao Zhang 6397929ea7SJunchao Zhang /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ 64*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n")); 65*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 6697929ea7SJunchao Zhang 6797929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector 6897929ea7SJunchao Zhang x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). 6997929ea7SJunchao Zhang 7097929ea7SJunchao Zhang We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap 7197929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. 7297929ea7SJunchao Zhang isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} 7397929ea7SJunchao Zhang */ 74*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&tomap)); 7597929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 76*9566063dSJacob Faibussowitsch PetscCall(VecScatterRemap(vscat,tomap,NULL)); 77*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 78*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7997929ea7SJunchao Zhang 8097929ea7SJunchao Zhang /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ 81*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n")); 82*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 8397929ea7SJunchao Zhang 8497929ea7SJunchao Zhang /* destroy everything before we recreate them in different types */ 85*9566063dSJacob Faibussowitsch PetscCall(PetscFree2(ix,iy)); 86*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 87*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 88*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isx)); 89*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isy)); 90*9566063dSJacob Faibussowitsch PetscCall(PetscFree(tomap)); 91*9566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 9297929ea7SJunchao Zhang 9397929ea7SJunchao Zhang /* ========================================================================================== 9497929ea7SJunchao Zhang (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter 9597929ea7SJunchao Zhang ========================================================================================== 9697929ea7SJunchao Zhang */ 9797929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 9897929ea7SJunchao Zhang 9997929ea7SJunchao Zhang /* create two seq vectors x, y of length n */ 100*9566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 101*9566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x,&y)); 10297929ea7SJunchao Zhang 10397929ea7SJunchao Zhang /* Initialize x as {0~63} */ 104*9566063dSJacob Faibussowitsch for (i=0; i<n; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 105*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 106*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 10797929ea7SJunchao Zhang 10897929ea7SJunchao Zhang /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as 10997929ea7SJunchao Zhang general and let PETSc detect the pattern and optimize it */ 110*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n,&ix,n,&iy)); 11197929ea7SJunchao Zhang for (i=0; i<n; i++) ix[i] = i; 112*9566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx)); 113*9566063dSJacob Faibussowitsch PetscCall(ISDuplicate(isx,&isy)); 11497929ea7SJunchao Zhang 11597929ea7SJunchao Zhang /* create a vecscatter that just copies x to y */ 116*9566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,isx,y,isy,&vscat)); 117*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 118*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11997929ea7SJunchao Zhang 12097929ea7SJunchao Zhang /* view y to check the result. y should be {0~63} */ 121*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n")); 122*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 12397929ea7SJunchao Zhang 12497929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 12597929ea7SJunchao Zhang 12697929ea7SJunchao Zhang Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap 12797929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. 12897929ea7SJunchao Zhang */ 129*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&tomap)); 13097929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 131*9566063dSJacob Faibussowitsch PetscCall(VecScatterRemap(vscat,tomap,NULL)); 132*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 133*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 13497929ea7SJunchao Zhang 13597929ea7SJunchao Zhang /* view y to check the result. y should be {32~63,0~31} */ 136*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n")); 137*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 13897929ea7SJunchao Zhang 13997929ea7SJunchao Zhang /* destroy everything before we recreate them in different types */ 140*9566063dSJacob Faibussowitsch PetscCall(PetscFree2(ix,iy)); 141*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 142*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 143*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isx)); 144*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isy)); 145*9566063dSJacob Faibussowitsch PetscCall(PetscFree(tomap)); 146*9566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 14797929ea7SJunchao Zhang 14897929ea7SJunchao Zhang /* =================================================================================================== 14997929ea7SJunchao Zhang (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter 15097929ea7SJunchao Zhang =================================================================================================== 15197929ea7SJunchao Zhang */ 15297929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 15397929ea7SJunchao Zhang 15497929ea7SJunchao Zhang /* create two seq vectors x of length n, and y of length n/2 */ 155*9566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 156*9566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,n/2,&y)); 15797929ea7SJunchao Zhang 15897929ea7SJunchao Zhang /* Initialize x as {0~63} */ 159*9566063dSJacob Faibussowitsch for (i=0; i<n; i++) PetscCall(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 160*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 161*9566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 16297929ea7SJunchao Zhang 16397929ea7SJunchao Zhang /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, 16497929ea7SJunchao Zhang but we use it as general and let PETSc detect the pattern and optimize it. */ 165*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n/2,&ix,n/2,&iy)); 16697929ea7SJunchao Zhang for (i=0; i<n/2; i++) ix[i] = i*2; 167*9566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx)); 16897929ea7SJunchao Zhang 16997929ea7SJunchao Zhang /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ 170*9566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy)); 17197929ea7SJunchao Zhang 17297929ea7SJunchao Zhang /* create a vecscatter that just copies even entries of x to y */ 173*9566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,isx,y,isy,&vscat)); 174*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 175*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 17697929ea7SJunchao Zhang 17797929ea7SJunchao Zhang /* view y to check the result. y should be {0:63:2} */ 178*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 179*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 18097929ea7SJunchao Zhang 18197929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 18297929ea7SJunchao Zhang 18397929ea7SJunchao Zhang Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap 18497929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. 18597929ea7SJunchao Zhang */ 186*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&tomap)); 18797929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 188*9566063dSJacob Faibussowitsch PetscCall(VecScatterRemap(vscat,tomap,NULL)); 189*9566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 190*9566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 19197929ea7SJunchao Zhang 19297929ea7SJunchao Zhang /* view y to check the result. y should be {32:63:2,0:31:2} */ 193*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 194*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 19597929ea7SJunchao Zhang 19697929ea7SJunchao Zhang /* destroy everything before PetscFinalize */ 197*9566063dSJacob Faibussowitsch PetscCall(PetscFree2(ix,iy)); 198*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 199*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 200*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isx)); 201*9566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isy)); 202*9566063dSJacob Faibussowitsch PetscCall(PetscFree(tomap)); 203*9566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 20497929ea7SJunchao Zhang 205*9566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 206b122ec5aSJacob Faibussowitsch return 0; 20797929ea7SJunchao Zhang } 20897929ea7SJunchao Zhang 20997929ea7SJunchao Zhang /*TEST 21097929ea7SJunchao Zhang 21197929ea7SJunchao Zhang test: 21297929ea7SJunchao Zhang suffix: 1 21397929ea7SJunchao Zhang nsize: 2 21497929ea7SJunchao Zhang diff_args: -j 21597929ea7SJunchao Zhang requires: double 21697929ea7SJunchao Zhang TEST*/ 217