11f480b34SSatish Balay 2c6db04a5SJed Brown #include <petscsys.h> 38563dfccSBarry Smith #include <petsctime.h> 48563dfccSBarry Smith 51f480b34SSatish Balay 65a655dc6SBarry Smith extern int BlastCache(void); 75a655dc6SBarry Smith extern int test1(void); 85a655dc6SBarry Smith extern int test2(void); 977c4ece6SBarry Smith 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "main" 121f480b34SSatish Balay int main(int argc,char **argv) 131f480b34SSatish Balay { 14dfbe8321SBarry Smith PetscErrorCode ierr; 15d3093643SSatish Balay 1677c4ece6SBarry Smith PetscInitialize(&argc,&argv,0,0); 17ac355199SBarry Smith ierr = test1();CHKERRQ(ierr); 18ac355199SBarry Smith ierr = test2();CHKERRQ(ierr); 19f3fe499bSBarry Smith ierr = PetscFinalize(); 20*26f47effSBarry Smith return ierr; 2177c4ece6SBarry Smith } 2277c4ece6SBarry Smith 234a2ae208SSatish Balay #undef __FUNCT__ 244a2ae208SSatish Balay #define __FUNCT__ "test1" 25cf256101SBarry Smith int test1(void) 2677c4ece6SBarry Smith { 27b0a32e0cSBarry Smith PetscLogDouble t1,t2; 2847794344SBarry Smith double value; 292758efb8SSatish Balay int i,ierr,*z,*zi,intval; 30ea709b57SSatish Balay PetscScalar *x,*y; 3177c4ece6SBarry Smith PetscRandom r; 3277c4ece6SBarry Smith 33c77d6671SHong Zhang ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 34c77d6671SHong Zhang ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 35785e854fSJed Brown ierr = PetscMalloc1(20000,&x);CHKERRQ(ierr); 36785e854fSJed Brown ierr = PetscMalloc1(20000,&y);CHKERRQ(ierr); 3777c4ece6SBarry Smith 38785e854fSJed Brown ierr = PetscMalloc1(2000,&z);CHKERRQ(ierr); 39785e854fSJed Brown ierr = PetscMalloc1(2000,&zi);CHKERRQ(ierr); 4077c4ece6SBarry Smith 411f480b34SSatish Balay /* Take care of paging effects */ 428563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 431f480b34SSatish Balay 441f480b34SSatish Balay /* Form the random set of integers */ 4577c4ece6SBarry Smith for (i=0; i<2000; i++) { 4677c4ece6SBarry Smith ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 4777c4ece6SBarry Smith intval = (int)(value*20000.0); 48c9a02da4SSatish Balay z[i] = intval; 491f480b34SSatish Balay } 501f480b34SSatish Balay 5177c4ece6SBarry Smith for (i=0; i<2000; i++) { 5277c4ece6SBarry Smith ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 5377c4ece6SBarry Smith intval = (int)(value*20000.0); 54ba8edd79SBarry Smith zi[i] = intval; 5577c4ece6SBarry Smith } 56b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 5777c4ece6SBarry Smith 58d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 591f480b34SSatish Balay 608563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 616f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 628563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 63b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 641f480b34SSatish Balay 65d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 661f480b34SSatish Balay 678563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 68608f96ebSSatish Balay for (i=0; i<500; i+=4) { 69608f96ebSSatish Balay x[i] = y[z[i]]; 70608f96ebSSatish Balay x[1+i] = y[z[1+i]]; 71608f96ebSSatish Balay x[2+i] = y[z[2+i]]; 72608f96ebSSatish Balay x[3+i] = y[z[3+i]]; 73608f96ebSSatish Balay } 748563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 75b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); 76608f96ebSSatish Balay 77d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 78608f96ebSSatish Balay 795bcc183dSPatrick Sanan ierr = PetscTime(&t1);CHKERRQ(ierr); 806f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[z[i]]; 818563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 82b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 8377c4ece6SBarry Smith 84d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 851f480b34SSatish Balay 868563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 87608f96ebSSatish Balay for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } 888563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 89b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); 90608f96ebSSatish Balay 91d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 92608f96ebSSatish Balay 938563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 946f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 958563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 96b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 971f480b34SSatish Balay 98d3093643SSatish Balay ierr = BlastCache();CHKERRQ(ierr); 9977c4ece6SBarry Smith 1008563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 1016f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[zi[i]]; 1028563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 103b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 10477c4ece6SBarry Smith 105549d3d68SSatish Balay ierr = PetscMemcpy(x,y,10);CHKERRQ(ierr); 106549d3d68SSatish Balay ierr = PetscMemcpy(z,zi,10);CHKERRQ(ierr); 107606d414cSSatish Balay ierr = PetscFree(z);CHKERRQ(ierr); 108606d414cSSatish Balay ierr = PetscFree(zi);CHKERRQ(ierr); 109606d414cSSatish Balay ierr = PetscFree(x);CHKERRQ(ierr); 110606d414cSSatish Balay ierr = PetscFree(y);CHKERRQ(ierr); 11196e147daSBarry Smith ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 1123a40ed3dSBarry Smith PetscFunctionReturn(0); 11377c4ece6SBarry Smith } 11477c4ece6SBarry Smith 1154a2ae208SSatish Balay #undef __FUNCT__ 1164a2ae208SSatish Balay #define __FUNCT__ "test2" 117cf256101SBarry Smith int test2(void) 11877c4ece6SBarry Smith { 119b0a32e0cSBarry Smith PetscLogDouble t1,t2; 12047794344SBarry Smith double value; 121d3093643SSatish Balay int i,ierr,z[20000],zi[20000],intval,tmp; 122ea709b57SSatish Balay PetscScalar x[20000],y[20000]; 12377c4ece6SBarry Smith PetscRandom r; 12477c4ece6SBarry Smith 125c77d6671SHong Zhang ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 126c77d6671SHong Zhang ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 12777c4ece6SBarry Smith 12877c4ece6SBarry Smith /* Take care of paging effects */ 1298563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 13077c4ece6SBarry Smith 13177c4ece6SBarry Smith for (i=0; i<20000; i++) { 13277c4ece6SBarry Smith x[i] = i; 13377c4ece6SBarry Smith y[i] = i; 134d3093643SSatish Balay z[i] = i; 135d3093643SSatish Balay zi[i] = i; 13677c4ece6SBarry Smith } 13777c4ece6SBarry Smith 13877c4ece6SBarry Smith /* Form the random set of integers */ 139d3093643SSatish Balay for (i=0; i<20000; i++) { 14077c4ece6SBarry Smith ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 14177c4ece6SBarry Smith intval = (int)(value*20000.0); 14277c4ece6SBarry Smith tmp = z[i]; 14377c4ece6SBarry Smith z[i] = z[intval]; 14477c4ece6SBarry Smith z[intval] = tmp; 14577c4ece6SBarry Smith } 14677c4ece6SBarry Smith 147d3093643SSatish Balay for (i=0; i<20000; i++) { 14877c4ece6SBarry Smith ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 14977c4ece6SBarry Smith intval = (int)(value*20000.0); 15077c4ece6SBarry Smith tmp = zi[i]; 15177c4ece6SBarry Smith zi[i] = zi[intval]; 15277c4ece6SBarry Smith zi[intval] = tmp; 15377c4ece6SBarry Smith } 154b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 15577c4ece6SBarry Smith 156d3093643SSatish Balay /* ierr = BlastCache();CHKERRQ(ierr); */ 15777c4ece6SBarry Smith 1588563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 1596f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 1608563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 161b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 16277c4ece6SBarry Smith 163d3093643SSatish Balay /* ierr = BlastCache();CHKERRQ(ierr); */ 16477c4ece6SBarry Smith 1658563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 1666f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[i] = x[z[i]]; 1678563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 168b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 16977c4ece6SBarry Smith 170d3093643SSatish Balay /* ierr = BlastCache();CHKERRQ(ierr); */ 17177c4ece6SBarry Smith 1728563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 1736f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 1748563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 175b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 17677c4ece6SBarry Smith 177d3093643SSatish Balay /* ierr = BlastCache();CHKERRQ(ierr); */ 17877c4ece6SBarry Smith 1798563dfccSBarry Smith ierr = PetscTime(&t1);CHKERRQ(ierr); 1806f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[z[i]] = x[zi[i]]; 1818563dfccSBarry Smith ierr = PetscTime(&t2);CHKERRQ(ierr); 182b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 18377c4ece6SBarry Smith 18477c4ece6SBarry Smith 18596e147daSBarry Smith ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 1863a40ed3dSBarry Smith PetscFunctionReturn(0); 18777c4ece6SBarry Smith } 18877c4ece6SBarry Smith 1894a2ae208SSatish Balay #undef __FUNCT__ 1904a2ae208SSatish Balay #define __FUNCT__ "BlastCache" 191465d0859SSatish Balay int BlastCache(void) 19277c4ece6SBarry Smith { 1939ae0b57aSSatish Balay int i,ierr,n = 1000000; 194ea709b57SSatish Balay PetscScalar *x,*y,*z,*a,*b; 19577c4ece6SBarry Smith 196785e854fSJed Brown ierr = PetscMalloc1(5*n,&x);CHKERRQ(ierr); 19777c4ece6SBarry Smith y = x + n; 19877c4ece6SBarry Smith z = y + n; 19977c4ece6SBarry Smith a = z + n; 20077c4ece6SBarry Smith b = a + n; 20177c4ece6SBarry Smith 20277c4ece6SBarry Smith for (i=0; i<n; i++) { 20387828ca2SBarry Smith a[i] = (PetscScalar) i; 20487828ca2SBarry Smith y[i] = (PetscScalar) i; 20587828ca2SBarry Smith z[i] = (PetscScalar) i; 20687828ca2SBarry Smith b[i] = (PetscScalar) i; 20787828ca2SBarry Smith x[i] = (PetscScalar) i; 208ba8edd79SBarry Smith } 209ba8edd79SBarry Smith 2106f2b61bcSKarl Rupp for (i=0; i<n; i++) a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i]; 2116f2b61bcSKarl Rupp for (i=0; i<n; i++) b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 2126f2b61bcSKarl Rupp for (i=0; i<n; i++) z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 213606d414cSSatish Balay ierr = PetscFree(x);CHKERRQ(ierr); 2143a40ed3dSBarry Smith PetscFunctionReturn(0); 2151f480b34SSatish Balay } 216