11f480b34SSatish Balay 2c6db04a5SJed Brown #include <petscsys.h> 38563dfccSBarry Smith #include <petsctime.h> 48563dfccSBarry Smith 55a655dc6SBarry Smith extern int BlastCache(void); 65a655dc6SBarry Smith extern int test1(void); 75a655dc6SBarry Smith extern int test2(void); 877c4ece6SBarry Smith 91f480b34SSatish Balay int main(int argc,char **argv) 101f480b34SSatish Balay { 11d3093643SSatish Balay 12*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,0,0)); 135f80ce2aSJacob Faibussowitsch CHKERRQ(test1()); 145f80ce2aSJacob Faibussowitsch CHKERRQ(test2()); 15*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 16*b122ec5aSJacob Faibussowitsch return 0; 1777c4ece6SBarry Smith } 1877c4ece6SBarry Smith 19cf256101SBarry Smith int test1(void) 2077c4ece6SBarry Smith { 21b0a32e0cSBarry Smith PetscLogDouble t1,t2; 2247794344SBarry Smith double value; 232758efb8SSatish Balay int i,ierr,*z,*zi,intval; 24ea709b57SSatish Balay PetscScalar *x,*y; 2577c4ece6SBarry Smith PetscRandom r; 2677c4ece6SBarry Smith 275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomCreate(PETSC_COMM_SELF,&r)); 285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomSetFromOptions(r)); 295f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(20000,&x)); 305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(20000,&y)); 3177c4ece6SBarry Smith 325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2000,&z)); 335f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2000,&zi)); 3477c4ece6SBarry Smith 351f480b34SSatish Balay /* Take care of paging effects */ 365f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 371f480b34SSatish Balay 381f480b34SSatish Balay /* Form the random set of integers */ 3977c4ece6SBarry Smith for (i=0; i<2000; i++) { 405f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 4177c4ece6SBarry Smith intval = (int)(value*20000.0); 42c9a02da4SSatish Balay z[i] = intval; 431f480b34SSatish Balay } 441f480b34SSatish Balay 4577c4ece6SBarry Smith for (i=0; i<2000; i++) { 465f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 4777c4ece6SBarry Smith intval = (int)(value*20000.0); 48ba8edd79SBarry Smith zi[i] = intval; 4977c4ece6SBarry Smith } 50b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 5177c4ece6SBarry Smith 525f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 531f480b34SSatish Balay 545f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 556f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 57b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 581f480b34SSatish Balay 595f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 601f480b34SSatish Balay 615f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 62608f96ebSSatish Balay for (i=0; i<500; i+=4) { 63608f96ebSSatish Balay x[i] = y[z[i]]; 64608f96ebSSatish Balay x[1+i] = y[z[1+i]]; 65608f96ebSSatish Balay x[2+i] = y[z[2+i]]; 66608f96ebSSatish Balay x[3+i] = y[z[3+i]]; 67608f96ebSSatish Balay } 685f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 69b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); 70608f96ebSSatish Balay 715f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 72608f96ebSSatish Balay 735f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 746f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[z[i]]; 755f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 76b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 7777c4ece6SBarry Smith 785f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 791f480b34SSatish Balay 805f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 81608f96ebSSatish Balay for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } 825f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 83b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); 84608f96ebSSatish Balay 855f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 86608f96ebSSatish Balay 875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 886f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 895f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 90b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 911f480b34SSatish Balay 925f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 9377c4ece6SBarry Smith 945f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 956f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[zi[i]]; 965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 97b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 9877c4ece6SBarry Smith 995f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(x,y,10)); 1005f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(z,zi,10)); 1015f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(z)); 1025f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(zi)); 1035f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(x)); 1045f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(y)); 1055f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomDestroy(&r)); 1063a40ed3dSBarry Smith PetscFunctionReturn(0); 10777c4ece6SBarry Smith } 10877c4ece6SBarry Smith 109cf256101SBarry Smith int test2(void) 11077c4ece6SBarry Smith { 111b0a32e0cSBarry Smith PetscLogDouble t1,t2; 11247794344SBarry Smith double value; 113d3093643SSatish Balay int i,ierr,z[20000],zi[20000],intval,tmp; 114ea709b57SSatish Balay PetscScalar x[20000],y[20000]; 11577c4ece6SBarry Smith PetscRandom r; 11677c4ece6SBarry Smith 1175f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomCreate(PETSC_COMM_SELF,&r)); 1185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomSetFromOptions(r)); 11977c4ece6SBarry Smith 12077c4ece6SBarry Smith /* Take care of paging effects */ 1215f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 12277c4ece6SBarry Smith 12377c4ece6SBarry Smith for (i=0; i<20000; i++) { 12477c4ece6SBarry Smith x[i] = i; 12577c4ece6SBarry Smith y[i] = i; 126d3093643SSatish Balay z[i] = i; 127d3093643SSatish Balay zi[i] = i; 12877c4ece6SBarry Smith } 12977c4ece6SBarry Smith 13077c4ece6SBarry Smith /* Form the random set of integers */ 131d3093643SSatish Balay for (i=0; i<20000; i++) { 1325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 13377c4ece6SBarry Smith intval = (int)(value*20000.0); 13477c4ece6SBarry Smith tmp = z[i]; 13577c4ece6SBarry Smith z[i] = z[intval]; 13677c4ece6SBarry Smith z[intval] = tmp; 13777c4ece6SBarry Smith } 13877c4ece6SBarry Smith 139d3093643SSatish Balay for (i=0; i<20000; i++) { 1405f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 14177c4ece6SBarry Smith intval = (int)(value*20000.0); 14277c4ece6SBarry Smith tmp = zi[i]; 14377c4ece6SBarry Smith zi[i] = zi[intval]; 14477c4ece6SBarry Smith zi[intval] = tmp; 14577c4ece6SBarry Smith } 146b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 14777c4ece6SBarry Smith 1485f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 14977c4ece6SBarry Smith 1505f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1516f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 1525f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 153b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 15477c4ece6SBarry Smith 1555f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 15677c4ece6SBarry Smith 1575f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1586f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[i] = x[z[i]]; 1595f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 160b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 16177c4ece6SBarry Smith 1625f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 16377c4ece6SBarry Smith 1645f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1656f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 1665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 167b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 16877c4ece6SBarry Smith 1695f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 17077c4ece6SBarry Smith 1715f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1726f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[z[i]] = x[zi[i]]; 1735f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 174b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 17577c4ece6SBarry Smith 1765f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomDestroy(&r)); 1773a40ed3dSBarry Smith PetscFunctionReturn(0); 17877c4ece6SBarry Smith } 17977c4ece6SBarry Smith 180465d0859SSatish Balay int BlastCache(void) 18177c4ece6SBarry Smith { 1829ae0b57aSSatish Balay int i,ierr,n = 1000000; 183ea709b57SSatish Balay PetscScalar *x,*y,*z,*a,*b; 18477c4ece6SBarry Smith 1855f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(5*n,&x)); 18677c4ece6SBarry Smith y = x + n; 18777c4ece6SBarry Smith z = y + n; 18877c4ece6SBarry Smith a = z + n; 18977c4ece6SBarry Smith b = a + n; 19077c4ece6SBarry Smith 19177c4ece6SBarry Smith for (i=0; i<n; i++) { 19287828ca2SBarry Smith a[i] = (PetscScalar) i; 19387828ca2SBarry Smith y[i] = (PetscScalar) i; 19487828ca2SBarry Smith z[i] = (PetscScalar) i; 19587828ca2SBarry Smith b[i] = (PetscScalar) i; 19687828ca2SBarry Smith x[i] = (PetscScalar) i; 197ba8edd79SBarry Smith } 198ba8edd79SBarry Smith 1996f2b61bcSKarl Rupp for (i=0; i<n; i++) a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i]; 2006f2b61bcSKarl Rupp for (i=0; i<n; i++) b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 2016f2b61bcSKarl Rupp for (i=0; i<n; i++) z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 2025f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(x)); 2033a40ed3dSBarry Smith PetscFunctionReturn(0); 2041f480b34SSatish Balay } 205