1 static char help[]= " Test VecScatterRemap() on various vecscatter. \n\ 2 We may do optimization based on index patterns. After index remapping by VecScatterRemap(), we need to \n\ 3 make sure the vecscatter works as expected with the optimizaiton. \n\ 4 VecScatterRemap() does not support all kinds of vecscatters. In addition, it only supports remapping \n\ 5 entries where we read the data (i.e., todata in paralle scatter, fromdata in sequential scatter). This test \n\ 6 tests VecScatterRemap on parallel to paralle (PtoP) vecscatter, sequential general to sequential \n\ 7 general (SGToSG) vecscatter and sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter.\n\n"; 8 9 #include <petscvec.h> 10 11 int main(int argc,char **argv) 12 { 13 PetscErrorCode ierr; 14 PetscInt i,n,*ix,*iy,*tomap,start; 15 Vec x,y; 16 PetscMPIInt nproc,rank; 17 IS isx,isy; 18 const PetscInt *ranges; 19 VecScatter vscat; 20 21 PetscFunctionBegin; 22 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 23 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc)); 24 CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 25 26 PetscCheckFalse(nproc != 2,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This test must run with exactly two MPI ranks"); 27 28 /* ==================================================================== 29 (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter 30 ==================================================================== 31 */ 32 33 n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ 34 35 /* create two MPI vectors x, y of length n=64, N=128 */ 36 CHKERRQ(VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x)); 37 CHKERRQ(VecDuplicate(x,&y)); 38 39 /* Initialize x as {0~127} */ 40 CHKERRQ(VecGetOwnershipRanges(x,&ranges)); 41 for (i=ranges[rank]; i<ranges[rank+1]; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 42 CHKERRQ(VecAssemblyBegin(x)); 43 CHKERRQ(VecAssemblyEnd(x)); 44 45 /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use 46 it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter 47 have both local scatter and remote scatter (i.e., MPI communication) 48 */ 49 CHKERRQ(PetscMalloc2(n,&ix,n,&iy)); 50 start = ranges[rank]; 51 for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; 52 CHKERRQ(ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx)); 53 54 if (rank == 0) { for (i=0; i<n; i++) iy[i] = i+32; } 55 else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } 56 57 CHKERRQ(ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy)); 58 59 /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ 60 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 61 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 62 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 63 64 /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ 65 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n")); 66 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 67 68 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector 69 x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). 70 71 We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap 72 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. 73 isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} 74 */ 75 CHKERRQ(PetscMalloc1(n,&tomap)); 76 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 77 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 78 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 79 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 80 81 /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ 82 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n")); 83 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 84 85 /* destroy everything before we recreate them in different types */ 86 CHKERRQ(PetscFree2(ix,iy)); 87 CHKERRQ(VecDestroy(&x)); 88 CHKERRQ(VecDestroy(&y)); 89 CHKERRQ(ISDestroy(&isx)); 90 CHKERRQ(ISDestroy(&isy)); 91 CHKERRQ(PetscFree(tomap)); 92 CHKERRQ(VecScatterDestroy(&vscat)); 93 94 /* ========================================================================================== 95 (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter 96 ========================================================================================== 97 */ 98 n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 99 100 /* create two seq vectors x, y of length n */ 101 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 102 CHKERRQ(VecDuplicate(x,&y)); 103 104 /* Initialize x as {0~63} */ 105 for (i=0; i<n; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 106 CHKERRQ(VecAssemblyBegin(x)); 107 CHKERRQ(VecAssemblyEnd(x)); 108 109 /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as 110 general and let PETSc detect the pattern and optimize it */ 111 CHKERRQ(PetscMalloc2(n,&ix,n,&iy)); 112 for (i=0; i<n; i++) ix[i] = i; 113 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx)); 114 CHKERRQ(ISDuplicate(isx,&isy)); 115 116 /* create a vecscatter that just copies x to y */ 117 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 118 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 119 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 120 121 /* view y to check the result. y should be {0~63} */ 122 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n")); 123 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 124 125 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 126 127 Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap 128 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. 129 */ 130 CHKERRQ(PetscMalloc1(n,&tomap)); 131 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 132 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 133 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 134 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 135 136 /* view y to check the result. y should be {32~63,0~31} */ 137 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n")); 138 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 139 140 /* destroy everything before we recreate them in different types */ 141 CHKERRQ(PetscFree2(ix,iy)); 142 CHKERRQ(VecDestroy(&x)); 143 CHKERRQ(VecDestroy(&y)); 144 CHKERRQ(ISDestroy(&isx)); 145 CHKERRQ(ISDestroy(&isy)); 146 CHKERRQ(PetscFree(tomap)); 147 CHKERRQ(VecScatterDestroy(&vscat)); 148 149 /* =================================================================================================== 150 (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter 151 =================================================================================================== 152 */ 153 n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 154 155 /* create two seq vectors x of length n, and y of length n/2 */ 156 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 157 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n/2,&y)); 158 159 /* Initialize x as {0~63} */ 160 for (i=0; i<n; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 161 CHKERRQ(VecAssemblyBegin(x)); 162 CHKERRQ(VecAssemblyEnd(x)); 163 164 /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, 165 but we use it as general and let PETSc detect the pattern and optimize it. */ 166 CHKERRQ(PetscMalloc2(n/2,&ix,n/2,&iy)); 167 for (i=0; i<n/2; i++) ix[i] = i*2; 168 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx)); 169 170 /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ 171 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy)); 172 173 /* create a vecscatter that just copies even entries of x to y */ 174 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 175 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 176 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 177 178 /* view y to check the result. y should be {0:63:2} */ 179 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 180 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 181 182 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 183 184 Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap 185 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. 186 */ 187 CHKERRQ(PetscMalloc1(n,&tomap)); 188 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 189 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 190 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 191 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 192 193 /* view y to check the result. y should be {32:63:2,0:31:2} */ 194 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 195 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 196 197 /* destroy everything before PetscFinalize */ 198 CHKERRQ(PetscFree2(ix,iy)); 199 CHKERRQ(VecDestroy(&x)); 200 CHKERRQ(VecDestroy(&y)); 201 CHKERRQ(ISDestroy(&isx)); 202 CHKERRQ(ISDestroy(&isy)); 203 CHKERRQ(PetscFree(tomap)); 204 CHKERRQ(VecScatterDestroy(&vscat)); 205 206 ierr = PetscFinalize(); 207 return ierr; 208 } 209 210 /*TEST 211 212 test: 213 suffix: 1 214 nsize: 2 215 diff_args: -j 216 requires: double 217 TEST*/ 218