1 static char help[]= " Test VecScatterRemap() on various vecscatter. \n\ 2 We may do optimization based on index patterns. After index remapping by VecScatterRemap(), we need to \n\ 3 make sure the vecscatter works as expected with the optimizaiton. \n\ 4 VecScatterRemap() does not support all kinds of vecscatters. In addition, it only supports remapping \n\ 5 entries where we read the data (i.e., todata in paralle scatter, fromdata in sequential scatter). This test \n\ 6 tests VecScatterRemap on parallel to paralle (PtoP) vecscatter, sequential general to sequential \n\ 7 general (SGToSG) vecscatter and sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter.\n\n"; 8 9 #include <petscvec.h> 10 11 int main(int argc,char **argv) 12 { 13 PetscInt i,n,*ix,*iy,*tomap,start; 14 Vec x,y; 15 PetscMPIInt nproc,rank; 16 IS isx,isy; 17 const PetscInt *ranges; 18 VecScatter vscat; 19 20 PetscFunctionBegin; 21 CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 22 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&nproc)); 23 CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 24 25 PetscCheckFalse(nproc != 2,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This test must run with exactly two MPI ranks"); 26 27 /* ==================================================================== 28 (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter 29 ==================================================================== 30 */ 31 32 n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ 33 34 /* create two MPI vectors x, y of length n=64, N=128 */ 35 CHKERRQ(VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x)); 36 CHKERRQ(VecDuplicate(x,&y)); 37 38 /* Initialize x as {0~127} */ 39 CHKERRQ(VecGetOwnershipRanges(x,&ranges)); 40 for (i=ranges[rank]; i<ranges[rank+1]; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 41 CHKERRQ(VecAssemblyBegin(x)); 42 CHKERRQ(VecAssemblyEnd(x)); 43 44 /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use 45 it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter 46 have both local scatter and remote scatter (i.e., MPI communication) 47 */ 48 CHKERRQ(PetscMalloc2(n,&ix,n,&iy)); 49 start = ranges[rank]; 50 for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; 51 CHKERRQ(ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx)); 52 53 if (rank == 0) { for (i=0; i<n; i++) iy[i] = i+32; } 54 else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } 55 56 CHKERRQ(ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy)); 57 58 /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ 59 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 60 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 61 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 62 63 /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ 64 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n")); 65 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 66 67 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector 68 x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). 69 70 We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap 71 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. 72 isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} 73 */ 74 CHKERRQ(PetscMalloc1(n,&tomap)); 75 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 76 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 77 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 78 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 79 80 /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ 81 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n")); 82 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 83 84 /* destroy everything before we recreate them in different types */ 85 CHKERRQ(PetscFree2(ix,iy)); 86 CHKERRQ(VecDestroy(&x)); 87 CHKERRQ(VecDestroy(&y)); 88 CHKERRQ(ISDestroy(&isx)); 89 CHKERRQ(ISDestroy(&isy)); 90 CHKERRQ(PetscFree(tomap)); 91 CHKERRQ(VecScatterDestroy(&vscat)); 92 93 /* ========================================================================================== 94 (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter 95 ========================================================================================== 96 */ 97 n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 98 99 /* create two seq vectors x, y of length n */ 100 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 101 CHKERRQ(VecDuplicate(x,&y)); 102 103 /* Initialize x as {0~63} */ 104 for (i=0; i<n; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 105 CHKERRQ(VecAssemblyBegin(x)); 106 CHKERRQ(VecAssemblyEnd(x)); 107 108 /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as 109 general and let PETSc detect the pattern and optimize it */ 110 CHKERRQ(PetscMalloc2(n,&ix,n,&iy)); 111 for (i=0; i<n; i++) ix[i] = i; 112 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx)); 113 CHKERRQ(ISDuplicate(isx,&isy)); 114 115 /* create a vecscatter that just copies x to y */ 116 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 117 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 118 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 119 120 /* view y to check the result. y should be {0~63} */ 121 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n")); 122 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 123 124 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 125 126 Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap 127 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. 128 */ 129 CHKERRQ(PetscMalloc1(n,&tomap)); 130 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 131 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 132 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 133 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 134 135 /* view y to check the result. y should be {32~63,0~31} */ 136 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n")); 137 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 138 139 /* destroy everything before we recreate them in different types */ 140 CHKERRQ(PetscFree2(ix,iy)); 141 CHKERRQ(VecDestroy(&x)); 142 CHKERRQ(VecDestroy(&y)); 143 CHKERRQ(ISDestroy(&isx)); 144 CHKERRQ(ISDestroy(&isy)); 145 CHKERRQ(PetscFree(tomap)); 146 CHKERRQ(VecScatterDestroy(&vscat)); 147 148 /* =================================================================================================== 149 (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter 150 =================================================================================================== 151 */ 152 n = 64; /* long enough to trigger memcpy optimizations in local scatter */ 153 154 /* create two seq vectors x of length n, and y of length n/2 */ 155 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&x)); 156 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n/2,&y)); 157 158 /* Initialize x as {0~63} */ 159 for (i=0; i<n; i++) CHKERRQ(VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES)); 160 CHKERRQ(VecAssemblyBegin(x)); 161 CHKERRQ(VecAssemblyEnd(x)); 162 163 /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, 164 but we use it as general and let PETSc detect the pattern and optimize it. */ 165 CHKERRQ(PetscMalloc2(n/2,&ix,n/2,&iy)); 166 for (i=0; i<n/2; i++) ix[i] = i*2; 167 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx)); 168 169 /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ 170 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy)); 171 172 /* create a vecscatter that just copies even entries of x to y */ 173 CHKERRQ(VecScatterCreate(x,isx,y,isy,&vscat)); 174 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 175 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 176 177 /* view y to check the result. y should be {0:63:2} */ 178 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 179 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 180 181 /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. 182 183 Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap 184 does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. 185 */ 186 CHKERRQ(PetscMalloc1(n,&tomap)); 187 for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; 188 CHKERRQ(VecScatterRemap(vscat,tomap,NULL)); 189 CHKERRQ(VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 190 CHKERRQ(VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 191 192 /* view y to check the result. y should be {32:63:2,0:31:2} */ 193 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n")); 194 CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 195 196 /* destroy everything before PetscFinalize */ 197 CHKERRQ(PetscFree2(ix,iy)); 198 CHKERRQ(VecDestroy(&x)); 199 CHKERRQ(VecDestroy(&y)); 200 CHKERRQ(ISDestroy(&isx)); 201 CHKERRQ(ISDestroy(&isy)); 202 CHKERRQ(PetscFree(tomap)); 203 CHKERRQ(VecScatterDestroy(&vscat)); 204 205 CHKERRQ(PetscFinalize()); 206 return 0; 207 } 208 209 /*TEST 210 211 test: 212 suffix: 1 213 nsize: 2 214 diff_args: -j 215 requires: double 216 TEST*/ 217