1 /* 2 An adaption of the Stream benchmark for MPI 3 Original code developed by John D. McCalpin 4 */ 5 #include <stdio.h> 6 #include <math.h> 7 #include <limits.h> 8 #include <float.h> 9 #include <petscsys.h> 10 11 #define NTIMESINNER 1 12 //#define N 2*4*20000000 13 #define N 80000000 14 //#define N 1200000 15 //#define N 120000 16 #define NTIMES 50 17 #define OFFSET 0 18 19 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 20 static double mintime = FLT_MAX; 21 static double bytes = 3 * sizeof(double) * N; 22 23 int main(int argc, char **args) 24 { 25 const double scalar = 3.0; 26 double times[NTIMES], rate; 27 PetscMPIInt rank, size, resultlen; 28 char hostname[MPI_MAX_PROCESSOR_NAME] = {0}; 29 PetscInt n = PETSC_DECIDE, NN; 30 31 PetscCall(PetscInitialize(&argc, &args, NULL, NULL)); 32 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 33 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 34 35 PetscCallMPI(MPI_Get_processor_name(hostname, &resultlen)); 36 (void)resultlen; 37 if (rank) PetscCallMPI(MPI_Send(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, 0, MPI_COMM_WORLD)); 38 else { 39 // printf("Rank %d host %s\n",0,hostname); 40 for (int j = 1; j < size; ++j) { 41 PetscCallMPI(MPI_Recv(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, j, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); 42 // printf("Rank %d host %s\n",j,hostname); 43 } 44 } 45 46 NN = N; 47 PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN)); 48 for (int j = 0; j < n; ++j) { 49 a[j] = 1.0; 50 b[j] = 2.0; 51 c[j] = 3.0; 52 } 53 54 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 55 for (int k = 0; k < NTIMES; ++k) { 56 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 57 // Do not include barrier in the timed region 58 times[k] = MPI_Wtime(); 59 for (int l = 0; l < NTIMESINNER; l++) { 60 for (register int j = 0; j < n; j++) a[j] = b[j] + scalar * c[j]; 61 if (size == 65) printf("never printed %g\n", a[11]); 62 } 63 // PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 64 times[k] = MPI_Wtime() - times[k]; 65 } 66 // use maximum time over all MPI processes 67 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD)); 68 for (int k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */ 69 mintime = PetscMin(mintime, times[k]); 70 } 71 rate = 1.0E-06 * bytes * NTIMESINNER / mintime; 72 73 if (rank == 0) { 74 FILE *fd; 75 76 if (size != 1) { 77 double prate; 78 79 fd = fopen("flops", "r"); 80 fscanf(fd, "%lg", &prate); 81 fclose(fd); 82 printf("%d %11.4f Rate (MB/s) %g \n", size, rate, rate / prate); 83 } else { 84 fd = fopen("flops", "w"); 85 fprintf(fd, "%g\n", rate); 86 fclose(fd); 87 printf("%d %11.4f Rate (MB/s) 1\n", size, rate); 88 } 89 } 90 PetscCall(PetscFinalize()); 91 return 0; 92 } 93