11f480b34SSatish Balay 2c6db04a5SJed Brown #include <petscsys.h> 38563dfccSBarry Smith #include <petsctime.h> 48563dfccSBarry Smith 55a655dc6SBarry Smith extern int BlastCache(void); 65a655dc6SBarry Smith extern int test1(void); 75a655dc6SBarry Smith extern int test2(void); 877c4ece6SBarry Smith 91f480b34SSatish Balay int main(int argc,char **argv) 101f480b34SSatish Balay { 11dfbe8321SBarry Smith PetscErrorCode ierr; 12d3093643SSatish Balay 13a438ae71SBarry Smith ierr = PetscInitialize(&argc,&argv,0,0);if (ierr) return ierr; 14*5f80ce2aSJacob Faibussowitsch CHKERRQ(test1()); 15*5f80ce2aSJacob Faibussowitsch CHKERRQ(test2()); 16f3fe499bSBarry Smith ierr = PetscFinalize(); 1726f47effSBarry Smith return ierr; 1877c4ece6SBarry Smith } 1977c4ece6SBarry Smith 20cf256101SBarry Smith int test1(void) 2177c4ece6SBarry Smith { 22b0a32e0cSBarry Smith PetscLogDouble t1,t2; 2347794344SBarry Smith double value; 242758efb8SSatish Balay int i,ierr,*z,*zi,intval; 25ea709b57SSatish Balay PetscScalar *x,*y; 2677c4ece6SBarry Smith PetscRandom r; 2777c4ece6SBarry Smith 28*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomCreate(PETSC_COMM_SELF,&r)); 29*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomSetFromOptions(r)); 30*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(20000,&x)); 31*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(20000,&y)); 3277c4ece6SBarry Smith 33*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2000,&z)); 34*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2000,&zi)); 3577c4ece6SBarry Smith 361f480b34SSatish Balay /* Take care of paging effects */ 37*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 381f480b34SSatish Balay 391f480b34SSatish Balay /* Form the random set of integers */ 4077c4ece6SBarry Smith for (i=0; i<2000; i++) { 41*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 4277c4ece6SBarry Smith intval = (int)(value*20000.0); 43c9a02da4SSatish Balay z[i] = intval; 441f480b34SSatish Balay } 451f480b34SSatish Balay 4677c4ece6SBarry Smith for (i=0; i<2000; i++) { 47*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 4877c4ece6SBarry Smith intval = (int)(value*20000.0); 49ba8edd79SBarry Smith zi[i] = intval; 5077c4ece6SBarry Smith } 51b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 5277c4ece6SBarry Smith 53*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 541f480b34SSatish Balay 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 566f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 57*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 58b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 591f480b34SSatish Balay 60*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 611f480b34SSatish Balay 62*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 63608f96ebSSatish Balay for (i=0; i<500; i+=4) { 64608f96ebSSatish Balay x[i] = y[z[i]]; 65608f96ebSSatish Balay x[1+i] = y[z[1+i]]; 66608f96ebSSatish Balay x[2+i] = y[z[2+i]]; 67608f96ebSSatish Balay x[3+i] = y[z[3+i]]; 68608f96ebSSatish Balay } 69*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 70b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); 71608f96ebSSatish Balay 72*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 73608f96ebSSatish Balay 74*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 756f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[z[i]]; 76*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 77b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 7877c4ece6SBarry Smith 79*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 801f480b34SSatish Balay 81*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 82608f96ebSSatish Balay for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } 83*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 84b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); 85608f96ebSSatish Balay 86*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 87608f96ebSSatish Balay 88*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 896f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 90*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 91b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 921f480b34SSatish Balay 93*5f80ce2aSJacob Faibussowitsch CHKERRQ(BlastCache()); 9477c4ece6SBarry Smith 95*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 966f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[zi[i]]; 97*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 98b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 9977c4ece6SBarry Smith 100*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(x,y,10)); 101*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(z,zi,10)); 102*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(z)); 103*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(zi)); 104*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(x)); 105*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(y)); 106*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomDestroy(&r)); 1073a40ed3dSBarry Smith PetscFunctionReturn(0); 10877c4ece6SBarry Smith } 10977c4ece6SBarry Smith 110cf256101SBarry Smith int test2(void) 11177c4ece6SBarry Smith { 112b0a32e0cSBarry Smith PetscLogDouble t1,t2; 11347794344SBarry Smith double value; 114d3093643SSatish Balay int i,ierr,z[20000],zi[20000],intval,tmp; 115ea709b57SSatish Balay PetscScalar x[20000],y[20000]; 11677c4ece6SBarry Smith PetscRandom r; 11777c4ece6SBarry Smith 118*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomCreate(PETSC_COMM_SELF,&r)); 119*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomSetFromOptions(r)); 12077c4ece6SBarry Smith 12177c4ece6SBarry Smith /* Take care of paging effects */ 122*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 12377c4ece6SBarry Smith 12477c4ece6SBarry Smith for (i=0; i<20000; i++) { 12577c4ece6SBarry Smith x[i] = i; 12677c4ece6SBarry Smith y[i] = i; 127d3093643SSatish Balay z[i] = i; 128d3093643SSatish Balay zi[i] = i; 12977c4ece6SBarry Smith } 13077c4ece6SBarry Smith 13177c4ece6SBarry Smith /* Form the random set of integers */ 132d3093643SSatish Balay for (i=0; i<20000; i++) { 133*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 13477c4ece6SBarry Smith intval = (int)(value*20000.0); 13577c4ece6SBarry Smith tmp = z[i]; 13677c4ece6SBarry Smith z[i] = z[intval]; 13777c4ece6SBarry Smith z[intval] = tmp; 13877c4ece6SBarry Smith } 13977c4ece6SBarry Smith 140d3093643SSatish Balay for (i=0; i<20000; i++) { 141*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomGetValue(r,&value)); 14277c4ece6SBarry Smith intval = (int)(value*20000.0); 14377c4ece6SBarry Smith tmp = zi[i]; 14477c4ece6SBarry Smith zi[i] = zi[intval]; 14577c4ece6SBarry Smith zi[intval] = tmp; 14677c4ece6SBarry Smith } 147b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 14877c4ece6SBarry Smith 149*5f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 15077c4ece6SBarry Smith 151*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1526f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[i] = y[i]; 153*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 154b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 15577c4ece6SBarry Smith 156*5f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 15777c4ece6SBarry Smith 158*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1596f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[i] = x[z[i]]; 160*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 161b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 16277c4ece6SBarry Smith 163*5f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 16477c4ece6SBarry Smith 165*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1666f2b61bcSKarl Rupp for (i=0; i<2000; i++) x[z[i]] = y[i]; 167*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 168b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 16977c4ece6SBarry Smith 170*5f80ce2aSJacob Faibussowitsch /* CHKERRQ(BlastCache()); */ 17177c4ece6SBarry Smith 172*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t1)); 1736f2b61bcSKarl Rupp for (i=0; i<2000; i++) y[z[i]] = x[zi[i]]; 174*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&t2)); 175b4d8b9abSSatish Balay fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 17677c4ece6SBarry Smith 177*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomDestroy(&r)); 1783a40ed3dSBarry Smith PetscFunctionReturn(0); 17977c4ece6SBarry Smith } 18077c4ece6SBarry Smith 181465d0859SSatish Balay int BlastCache(void) 18277c4ece6SBarry Smith { 1839ae0b57aSSatish Balay int i,ierr,n = 1000000; 184ea709b57SSatish Balay PetscScalar *x,*y,*z,*a,*b; 18577c4ece6SBarry Smith 186*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(5*n,&x)); 18777c4ece6SBarry Smith y = x + n; 18877c4ece6SBarry Smith z = y + n; 18977c4ece6SBarry Smith a = z + n; 19077c4ece6SBarry Smith b = a + n; 19177c4ece6SBarry Smith 19277c4ece6SBarry Smith for (i=0; i<n; i++) { 19387828ca2SBarry Smith a[i] = (PetscScalar) i; 19487828ca2SBarry Smith y[i] = (PetscScalar) i; 19587828ca2SBarry Smith z[i] = (PetscScalar) i; 19687828ca2SBarry Smith b[i] = (PetscScalar) i; 19787828ca2SBarry Smith x[i] = (PetscScalar) i; 198ba8edd79SBarry Smith } 199ba8edd79SBarry Smith 2006f2b61bcSKarl Rupp for (i=0; i<n; i++) a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i]; 2016f2b61bcSKarl Rupp for (i=0; i<n; i++) b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 2026f2b61bcSKarl Rupp for (i=0; i<n; i++) z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 203*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(x)); 2043a40ed3dSBarry Smith PetscFunctionReturn(0); 2051f480b34SSatish Balay } 206