19f0612e4SBarry Smith /* 29f0612e4SBarry Smith An adaption of the Stream benchmark for MPI 39f0612e4SBarry Smith Original code developed by John D. McCalpin 49f0612e4SBarry Smith */ 55e71baefSBarry Smith #include <petscsys.h> 6d3ae85c4SBarry Smith 79f0612e4SBarry Smith #define NTIMESINNER 1 8*c6bff371SJunchao Zhang #define N 80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node 9d3ae85c4SBarry Smith #define NTIMES 50 10d3ae85c4SBarry Smith #define OFFSET 0 11d3ae85c4SBarry Smith 1251096aa5SJacob Faibussowitsch static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 13*c6bff371SJunchao Zhang static double mintime = 1e9; 149f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N; 15d3ae85c4SBarry Smith 16d3ae85c4SBarry Smith int main(int argc, char **args) 17d3ae85c4SBarry Smith { 1851096aa5SJacob Faibussowitsch const double scalar = 3.0; 199f0612e4SBarry Smith double times[NTIMES], rate; 20*c6bff371SJunchao Zhang PetscMPIInt rank, size; 219f0612e4SBarry Smith PetscInt n = PETSC_DECIDE, NN; 22d3ae85c4SBarry Smith 23b8abcfdeSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, NULL, NULL)); 2451096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 2551096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 26d3ae85c4SBarry Smith 279f0612e4SBarry Smith NN = N; 289f0612e4SBarry Smith PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN)); 29*c6bff371SJunchao Zhang for (PetscInt j = 0; j < n; ++j) { 30d3ae85c4SBarry Smith a[j] = 1.0; 31d3ae85c4SBarry Smith b[j] = 2.0; 329f0612e4SBarry Smith c[j] = 3.0; 33d3ae85c4SBarry Smith } 34d3ae85c4SBarry Smith 35d3ae85c4SBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 36*c6bff371SJunchao Zhang for (PetscInt k = 0; k < NTIMES; ++k) { 3751096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 389f0612e4SBarry Smith // Do not include barrier in the timed region 399f0612e4SBarry Smith times[k] = MPI_Wtime(); 40*c6bff371SJunchao Zhang for (PetscInt l = 0; l < NTIMESINNER; l++) { 41*c6bff371SJunchao Zhang for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j]; 42*c6bff371SJunchao Zhang if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out 43d3ae85c4SBarry Smith } 449f0612e4SBarry Smith // PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 459f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k]; 4651096aa5SJacob Faibussowitsch } 479f0612e4SBarry Smith // use maximum time over all MPI processes 48458b0db5SMartin Diehl PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD)); 49*c6bff371SJunchao Zhang for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */ 509f0612e4SBarry Smith mintime = PetscMin(mintime, times[k]); 519f0612e4SBarry Smith } 529f0612e4SBarry Smith rate = 1.0E-06 * bytes * NTIMESINNER / mintime; 53d3ae85c4SBarry Smith 54c5853193SPierre Jolivet if (rank == 0) { 5551096aa5SJacob Faibussowitsch FILE *fd; 5651096aa5SJacob Faibussowitsch 57ed38be93SPierre Jolivet if (size != 1) { 584198fb66SBarry Smith double prate; 5951096aa5SJacob Faibussowitsch 60*c6bff371SJunchao Zhang PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd)); 61*c6bff371SJunchao Zhang PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file"); 62*c6bff371SJunchao Zhang PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 63*c6bff371SJunchao Zhang PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, rate / prate)); 6451096aa5SJacob Faibussowitsch } else { 65*c6bff371SJunchao Zhang PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd)); 66*c6bff371SJunchao Zhang PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate)); 67*c6bff371SJunchao Zhang PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 68*c6bff371SJunchao Zhang PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, 1.0)); 694198fb66SBarry Smith } 70d3ae85c4SBarry Smith } 7151096aa5SJacob Faibussowitsch PetscCall(PetscFinalize()); 72d3ae85c4SBarry Smith return 0; 73d3ae85c4SBarry Smith } 74