1*97929ea7SJunchao Zhang static char help[]= " Test VecScatterRemap() on various vecscatter. \n\ 2*97929ea7SJunchao Zhang We may do optimization based on index patterns. After index remapping by VecScatterRemap(), we need to \n\ 3*97929ea7SJunchao Zhang make sure the vecscatter works as expected with the optimizaiton. \n\ 4*97929ea7SJunchao Zhang VecScatterRemap() does not support all kinds of vecscatters. In addition, it only supports remapping \n\ 5*97929ea7SJunchao Zhang entries where we read the data (i.e., todata in paralle scatter, fromdata in sequential scatter). This test \n\ 6*97929ea7SJunchao Zhang tests VecScatterRemap on parallel to paralle (PtoP) vecscatter, sequential general to sequential \n\ 7*97929ea7SJunchao Zhang general (SGToSG) vecscatter and sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter.\n\n"; 8*97929ea7SJunchao Zhang 9*97929ea7SJunchao Zhang #include <petscvec.h> 10*97929ea7SJunchao Zhang 11*97929ea7SJunchao Zhang int main(int argc,char **argv) 12*97929ea7SJunchao Zhang { 13*97929ea7SJunchao Zhang PetscErrorCode ierr; 14*97929ea7SJunchao Zhang PetscInt i,n,*ix,*iy,*tomap,start; 15*97929ea7SJunchao Zhang Vec x,y; 16*97929ea7SJunchao Zhang PetscMPIInt nproc,rank; 17*97929ea7SJunchao Zhang IS isx,isy; 18*97929ea7SJunchao Zhang const PetscInt *ranges; 19*97929ea7SJunchao Zhang VecScatter vscat; 20*97929ea7SJunchao Zhang 21*97929ea7SJunchao Zhang PetscFunctionBegin; 22*97929ea7SJunchao Zhang ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 23*97929ea7SJunchao Zhang ierr = MPI_Comm_size(PETSC_COMM_WORLD,&nproc);CHKERRQ(ierr); 24*97929ea7SJunchao Zhang ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 25*97929ea7SJunchao Zhang 26*97929ea7SJunchao Zhang if (nproc != 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This test must run with exactly two MPI ranks\n"); 27*97929ea7SJunchao Zhang 28*97929ea7SJunchao Zhang /* ==================================================================== 29*97929ea7SJunchao Zhang (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter 30*97929ea7SJunchao Zhang ==================================================================== 31*97929ea7SJunchao Zhang */ 32*97929ea7SJunchao Zhang 33*97929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ 34*97929ea7SJunchao Zhang 35*97929ea7SJunchao Zhang /* create two MPI vectors x, y of length n=64, N=128 */ 36*97929ea7SJunchao Zhang ierr = VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x);CHKERRQ(ierr); 37*97929ea7SJunchao Zhang ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 38*97929ea7SJunchao Zhang 39*97929ea7SJunchao Zhang /* Initialize x as {0~127} */ 40*97929ea7SJunchao Zhang ierr = VecGetOwnershipRanges(x,&ranges);CHKERRQ(ierr); 41*97929ea7SJunchao Zhang for (i=ranges[rank]; i<ranges[rank+1]; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } 42*97929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 43*97929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 44*97929ea7SJunchao Zhang 45*97929ea7SJunchao Zhang /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use 46*97929ea7SJunchao Zhang it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter 47*97929ea7SJunchao Zhang have both local scatter and remote scatter (i.e., MPI communication) 48*97929ea7SJunchao Zhang */ 49*97929ea7SJunchao Zhang ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); 50*97929ea7SJunchao Zhang start = ranges[rank]; 51*97929ea7SJunchao Zhang for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; 52*97929ea7SJunchao Zhang ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); 53*97929ea7SJunchao Zhang 54*97929ea7SJunchao Zhang if (!rank) { for (i=0; i<n; i++) iy[i] = i+32; } 55*97929ea7SJunchao Zhang else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } 56*97929ea7SJunchao Zhang 57*97929ea7SJunchao Zhang ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr); 58*97929ea7SJunchao Zhang 59*97929ea7SJunchao Zhang /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ 60*97929ea7SJunchao Zhang ierr = VecScatterCreate(x,isx,y,isy,&vscat);CHKERRQ(ierr); 61*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 62*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 63*97929ea7SJunchao Zhang 64*97929ea7SJunchao Zhang /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ 65*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); 66*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 67*97929ea7SJunchao Zhang 68*97929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector 69*97929ea7SJunchao Zhang x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). 70*97929ea7SJunchao Zhang 71*97929ea7SJunchao Zhang We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap 72*97929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. 73*97929ea7SJunchao Zhang isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} 74*97929ea7SJunchao Zhang */ 75*97929ea7SJunchao Zhang ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); 76*97929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 77*97929ea7SJunchao Zhang ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); 78*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 80*97929ea7SJunchao Zhang 81*97929ea7SJunchao Zhang /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ 82*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); 83*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 84*97929ea7SJunchao Zhang 85*97929ea7SJunchao Zhang /* destroy everything before we recreate them in different types */ 86*97929ea7SJunchao Zhang ierr = PetscFree2(ix,iy);CHKERRQ(ierr); 87*97929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 88*97929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 89*97929ea7SJunchao Zhang ierr = ISDestroy(&isx);CHKERRQ(ierr); 90*97929ea7SJunchao Zhang ierr = ISDestroy(&isy);CHKERRQ(ierr); 91*97929ea7SJunchao Zhang ierr = PetscFree(tomap);CHKERRQ(ierr); 92*97929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 93*97929ea7SJunchao Zhang 94*97929ea7SJunchao Zhang /* ========================================================================================== 95*97929ea7SJunchao Zhang (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter 96*97929ea7SJunchao Zhang ========================================================================================== 97*97929ea7SJunchao Zhang */ 98*97929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 99*97929ea7SJunchao Zhang 100*97929ea7SJunchao Zhang /* create two seq vectors x, y of length n */ 101*97929ea7SJunchao Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); 102*97929ea7SJunchao Zhang ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 103*97929ea7SJunchao Zhang 104*97929ea7SJunchao Zhang /* Initialize x as {0~63} */ 105*97929ea7SJunchao Zhang for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } 106*97929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 107*97929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 108*97929ea7SJunchao Zhang 109*97929ea7SJunchao Zhang /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as 110*97929ea7SJunchao Zhang general and let PETSc detect the pattern and optimize it */ 111*97929ea7SJunchao Zhang ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); 112*97929ea7SJunchao Zhang for (i=0; i<n; i++) ix[i] = i; 113*97929ea7SJunchao Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); 114*97929ea7SJunchao Zhang ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); 115*97929ea7SJunchao Zhang 116*97929ea7SJunchao Zhang /* create a vecscatter that just copies x to y */ 117*97929ea7SJunchao Zhang ierr = VecScatterCreate(x,isx,y,isy,&vscat);CHKERRQ(ierr); 118*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 119*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 120*97929ea7SJunchao Zhang 121*97929ea7SJunchao Zhang /* view y to check the result. y should be {0~63} */ 122*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); 123*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 124*97929ea7SJunchao Zhang 125*97929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 126*97929ea7SJunchao Zhang 127*97929ea7SJunchao Zhang Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap 128*97929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. 129*97929ea7SJunchao Zhang */ 130*97929ea7SJunchao Zhang ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); 131*97929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 132*97929ea7SJunchao Zhang ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); 133*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 134*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 135*97929ea7SJunchao Zhang 136*97929ea7SJunchao Zhang /* view y to check the result. y should be {32~63,0~31} */ 137*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); 138*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 139*97929ea7SJunchao Zhang 140*97929ea7SJunchao Zhang /* destroy everything before we recreate them in different types */ 141*97929ea7SJunchao Zhang ierr = PetscFree2(ix,iy);CHKERRQ(ierr); 142*97929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 143*97929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 144*97929ea7SJunchao Zhang ierr = ISDestroy(&isx);CHKERRQ(ierr); 145*97929ea7SJunchao Zhang ierr = ISDestroy(&isy);CHKERRQ(ierr); 146*97929ea7SJunchao Zhang ierr = PetscFree(tomap);CHKERRQ(ierr); 147*97929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 148*97929ea7SJunchao Zhang 149*97929ea7SJunchao Zhang /* =================================================================================================== 150*97929ea7SJunchao Zhang (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter 151*97929ea7SJunchao Zhang =================================================================================================== 152*97929ea7SJunchao Zhang */ 153*97929ea7SJunchao Zhang n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 154*97929ea7SJunchao Zhang 155*97929ea7SJunchao Zhang /* create two seq vectors x of length n, and y of length n/2 */ 156*97929ea7SJunchao Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); 157*97929ea7SJunchao Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,n/2,&y);CHKERRQ(ierr); 158*97929ea7SJunchao Zhang 159*97929ea7SJunchao Zhang /* Initialize x as {0~63} */ 160*97929ea7SJunchao Zhang for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } 161*97929ea7SJunchao Zhang ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 162*97929ea7SJunchao Zhang ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 163*97929ea7SJunchao Zhang 164*97929ea7SJunchao Zhang /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, 165*97929ea7SJunchao Zhang but we use it as general and let PETSc detect the pattern and optimize it. */ 166*97929ea7SJunchao Zhang ierr = PetscMalloc2(n/2,&ix,n/2,&iy);CHKERRQ(ierr); 167*97929ea7SJunchao Zhang for (i=0; i<n/2; i++) ix[i] = i*2; 168*97929ea7SJunchao Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); 169*97929ea7SJunchao Zhang 170*97929ea7SJunchao Zhang /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ 171*97929ea7SJunchao Zhang ierr = ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy);CHKERRQ(ierr); 172*97929ea7SJunchao Zhang 173*97929ea7SJunchao Zhang /* create a vecscatter that just copies even entries of x to y */ 174*97929ea7SJunchao Zhang ierr = VecScatterCreate(x,isx,y,isy,&vscat);CHKERRQ(ierr); 175*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 176*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 177*97929ea7SJunchao Zhang 178*97929ea7SJunchao Zhang /* view y to check the result. y should be {0:63:2} */ 179*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); 180*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 181*97929ea7SJunchao Zhang 182*97929ea7SJunchao Zhang /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 183*97929ea7SJunchao Zhang 184*97929ea7SJunchao Zhang Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap 185*97929ea7SJunchao Zhang does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. 186*97929ea7SJunchao Zhang */ 187*97929ea7SJunchao Zhang ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); 188*97929ea7SJunchao Zhang for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 189*97929ea7SJunchao Zhang ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); 190*97929ea7SJunchao Zhang ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 191*97929ea7SJunchao Zhang ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 192*97929ea7SJunchao Zhang 193*97929ea7SJunchao Zhang /* view y to check the result. y should be {32:63:2,0:31:2} */ 194*97929ea7SJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); 195*97929ea7SJunchao Zhang ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 196*97929ea7SJunchao Zhang 197*97929ea7SJunchao Zhang /* destroy everything before PetscFinalize */ 198*97929ea7SJunchao Zhang ierr = PetscFree2(ix,iy);CHKERRQ(ierr); 199*97929ea7SJunchao Zhang ierr = VecDestroy(&x);CHKERRQ(ierr); 200*97929ea7SJunchao Zhang ierr = VecDestroy(&y);CHKERRQ(ierr); 201*97929ea7SJunchao Zhang ierr = ISDestroy(&isx);CHKERRQ(ierr); 202*97929ea7SJunchao Zhang ierr = ISDestroy(&isy);CHKERRQ(ierr); 203*97929ea7SJunchao Zhang ierr = PetscFree(tomap);CHKERRQ(ierr); 204*97929ea7SJunchao Zhang ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); 205*97929ea7SJunchao Zhang 206*97929ea7SJunchao Zhang ierr = PetscFinalize(); 207*97929ea7SJunchao Zhang return ierr; 208*97929ea7SJunchao Zhang } 209*97929ea7SJunchao Zhang 210*97929ea7SJunchao Zhang /*TEST 211*97929ea7SJunchao Zhang 212*97929ea7SJunchao Zhang test: 213*97929ea7SJunchao Zhang suffix: 1 214*97929ea7SJunchao Zhang nsize: 2 215*97929ea7SJunchao Zhang diff_args: -j 216*97929ea7SJunchao Zhang requires: double 217*97929ea7SJunchao Zhang TEST*/ 218*97929ea7SJunchao Zhang 219