1*8e89d99cSJDBetteridge 2*8e89d99cSJDBetteridge static char help[] = "Tests `GarbageKeyAllReduceIntersect_Private()` in parallel\n\n"; 3*8e89d99cSJDBetteridge 4*8e89d99cSJDBetteridge #include <petscsys.h> 5*8e89d99cSJDBetteridge #include <petsc/private/garbagecollector.h> 6*8e89d99cSJDBetteridge 7*8e89d99cSJDBetteridge /* This program tests `GarbageKeyAllReduceIntersect_Private()`. 8*8e89d99cSJDBetteridge To test this routine in parallel, the sieve of Eratosthenes is 9*8e89d99cSJDBetteridge implemented. 10*8e89d99cSJDBetteridge */ 11*8e89d99cSJDBetteridge 12*8e89d99cSJDBetteridge /* Populate an array with Prime numbers <= n. 13*8e89d99cSJDBetteridge Primes are generated using trial division 14*8e89d99cSJDBetteridge */ 15*8e89d99cSJDBetteridge PetscErrorCode Prime(PetscInt64 **set, PetscInt n) 16*8e89d99cSJDBetteridge { 17*8e89d99cSJDBetteridge size_t overestimate; 18*8e89d99cSJDBetteridge PetscBool is_prime; 19*8e89d99cSJDBetteridge PetscInt64 ii, jj, count = 0; 20*8e89d99cSJDBetteridge PetscInt64 *prime; 21*8e89d99cSJDBetteridge 22*8e89d99cSJDBetteridge PetscFunctionBeginUser; 23*8e89d99cSJDBetteridge /* There will be fewer than ceil(1.26 * n/log(n)) primes <= n */ 24*8e89d99cSJDBetteridge overestimate = (size_t)PetscCeilReal(((PetscReal)n) * 1.26 / PetscLogReal((PetscReal)n)); 25*8e89d99cSJDBetteridge PetscCall(PetscMalloc1(overestimate, &prime)); 26*8e89d99cSJDBetteridge for (ii = 2; ii < n + 1; ii++) { 27*8e89d99cSJDBetteridge is_prime = PETSC_TRUE; 28*8e89d99cSJDBetteridge for (jj = 2; jj <= PetscFloorReal(PetscSqrtReal(ii)); jj++) { 29*8e89d99cSJDBetteridge if (ii % jj == 0) { 30*8e89d99cSJDBetteridge is_prime = PETSC_FALSE; 31*8e89d99cSJDBetteridge break; 32*8e89d99cSJDBetteridge } 33*8e89d99cSJDBetteridge } 34*8e89d99cSJDBetteridge if (is_prime) { 35*8e89d99cSJDBetteridge prime[count] = ii; 36*8e89d99cSJDBetteridge count++; 37*8e89d99cSJDBetteridge } 38*8e89d99cSJDBetteridge } 39*8e89d99cSJDBetteridge 40*8e89d99cSJDBetteridge PetscCall(PetscMalloc1((size_t)count + 1, set)); 41*8e89d99cSJDBetteridge (*set)[0] = count; 42*8e89d99cSJDBetteridge for (ii = 1; ii < count + 1; ii++) { (*set)[ii] = prime[ii - 1]; } 43*8e89d99cSJDBetteridge PetscCall(PetscFree(prime)); 44*8e89d99cSJDBetteridge PetscFunctionReturn(0); 45*8e89d99cSJDBetteridge } 46*8e89d99cSJDBetteridge 47*8e89d99cSJDBetteridge /* Print out the contents of a set */ 48*8e89d99cSJDBetteridge PetscErrorCode PrintSet(MPI_Comm comm, PetscInt64 *set) 49*8e89d99cSJDBetteridge { 50*8e89d99cSJDBetteridge char text[64]; 51*8e89d99cSJDBetteridge PetscInt ii; 52*8e89d99cSJDBetteridge 53*8e89d99cSJDBetteridge PetscFunctionBeginUser; 54*8e89d99cSJDBetteridge PetscCall(PetscSynchronizedPrintf(comm, "[")); 55*8e89d99cSJDBetteridge for (ii = 1; ii <= (PetscInt)set[0]; ii++) { 56*8e89d99cSJDBetteridge PetscCall(PetscFormatConvert(" %" PetscInt64_FMT ",", text)); 57*8e89d99cSJDBetteridge PetscCall(PetscSynchronizedPrintf(comm, text, set[ii])); 58*8e89d99cSJDBetteridge } 59*8e89d99cSJDBetteridge PetscCall(PetscSynchronizedPrintf(comm, "]\n")); 60*8e89d99cSJDBetteridge PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 61*8e89d99cSJDBetteridge PetscFunctionReturn(0); 62*8e89d99cSJDBetteridge } 63*8e89d99cSJDBetteridge 64*8e89d99cSJDBetteridge /* Check set equality */ 65*8e89d99cSJDBetteridge PetscErrorCode AssertSetsEqual(PetscInt64 *set, PetscInt64 *true_set) 66*8e89d99cSJDBetteridge { 67*8e89d99cSJDBetteridge PetscInt ii; 68*8e89d99cSJDBetteridge 69*8e89d99cSJDBetteridge PetscFunctionBeginUser; 70*8e89d99cSJDBetteridge PetscAssert((set[0] == true_set[0]), PETSC_COMM_WORLD, PETSC_ERR_ARG_INCOMP, "Sets of different sizes"); 71*8e89d99cSJDBetteridge for (ii = 1; ii < set[0] + 1; ii++) { PetscAssert((set[ii] == true_set[ii]), PETSC_COMM_WORLD, PETSC_ERR_ARG_INCOMP, "Sets are different"); } 72*8e89d99cSJDBetteridge PetscFunctionReturn(0); 73*8e89d99cSJDBetteridge } 74*8e89d99cSJDBetteridge 75*8e89d99cSJDBetteridge /* Parallel implementation of the sieve of Eratosthenes */ 76*8e89d99cSJDBetteridge PetscErrorCode test_sieve(MPI_Comm comm) 77*8e89d99cSJDBetteridge { 78*8e89d99cSJDBetteridge PetscInt64 ii, local_p, maximum, n; 79*8e89d99cSJDBetteridge PetscInt64 *local_set, *cursor, *bootstrap_primes, *truth; 80*8e89d99cSJDBetteridge PetscMPIInt size, rank; 81*8e89d99cSJDBetteridge PetscReal x; 82*8e89d99cSJDBetteridge 83*8e89d99cSJDBetteridge PetscFunctionBeginUser; 84*8e89d99cSJDBetteridge PetscCallMPI(MPI_Comm_rank(comm, &rank)); 85*8e89d99cSJDBetteridge PetscCallMPI(MPI_Comm_size(comm, &size)); 86*8e89d99cSJDBetteridge 87*8e89d99cSJDBetteridge /* There should be at least `size + 1` primes smaller than 88*8e89d99cSJDBetteridge (size + 1)*(log(size + 1) + log(log(size + 1)) 89*8e89d99cSJDBetteridge once `size` >=6 90*8e89d99cSJDBetteridge This is sufficient for each rank to create its own sieve based off 91*8e89d99cSJDBetteridge a different prime and calculate the size of the sieve. 92*8e89d99cSJDBetteridge */ 93*8e89d99cSJDBetteridge x = (PetscReal)(size > 6) ? size + 1 : 7; 94*8e89d99cSJDBetteridge x = x * (PetscLogReal(x) + PetscLogReal(PetscLogReal(x))); 95*8e89d99cSJDBetteridge PetscCall(Prime(&bootstrap_primes, PetscCeilReal(x))); 96*8e89d99cSJDBetteridge 97*8e89d99cSJDBetteridge /* Calculate the maximum possible prime, select a prime number for 98*8e89d99cSJDBetteridge each rank and allocate memorty for the sieve 99*8e89d99cSJDBetteridge */ 100*8e89d99cSJDBetteridge maximum = bootstrap_primes[size + 1] * bootstrap_primes[size + 1] - 1; 101*8e89d99cSJDBetteridge local_p = bootstrap_primes[rank + 1]; 102*8e89d99cSJDBetteridge n = maximum - local_p - (maximum / local_p) + 1 + rank + 1; 103*8e89d99cSJDBetteridge PetscCall(PetscMalloc1(n + 1, &local_set)); 104*8e89d99cSJDBetteridge 105*8e89d99cSJDBetteridge /* Populate the sieve first with all primes <= `local_p`, followed by 106*8e89d99cSJDBetteridge all integers that are not a multiple of `local_p` 107*8e89d99cSJDBetteridge */ 108*8e89d99cSJDBetteridge local_set[0] = n; 109*8e89d99cSJDBetteridge cursor = &local_set[1]; 110*8e89d99cSJDBetteridge for (ii = 0; ii < rank + 1; ii++) { 111*8e89d99cSJDBetteridge *cursor = bootstrap_primes[ii + 1]; 112*8e89d99cSJDBetteridge cursor++; 113*8e89d99cSJDBetteridge } 114*8e89d99cSJDBetteridge for (ii = local_p + 1; ii <= maximum; ii++) { 115*8e89d99cSJDBetteridge if (ii % local_p != 0) { 116*8e89d99cSJDBetteridge *cursor = ii; 117*8e89d99cSJDBetteridge cursor++; 118*8e89d99cSJDBetteridge } 119*8e89d99cSJDBetteridge } 120*8e89d99cSJDBetteridge PetscCall(PetscPrintf(comm, "SIEVES:\n")); 121*8e89d99cSJDBetteridge PetscCall(PrintSet(comm, local_set)); 122*8e89d99cSJDBetteridge 123*8e89d99cSJDBetteridge PetscCall(PetscFree(bootstrap_primes)); 124*8e89d99cSJDBetteridge 125*8e89d99cSJDBetteridge /* Perform the intersection, testing parallel intersection routine */ 126*8e89d99cSJDBetteridge PetscCall(GarbageKeyAllReduceIntersect_Private(PETSC_COMM_WORLD, &local_set[1], (PetscInt *)&local_set[0])); 127*8e89d99cSJDBetteridge 128*8e89d99cSJDBetteridge PetscCall(PetscPrintf(comm, "INTERSECTION:\n")); 129*8e89d99cSJDBetteridge PetscCall(PrintSet(comm, local_set)); 130*8e89d99cSJDBetteridge 131*8e89d99cSJDBetteridge PetscCall(Prime(&truth, maximum)); 132*8e89d99cSJDBetteridge PetscCall(PetscPrintf(comm, "TRUTH:\n")); 133*8e89d99cSJDBetteridge PetscCall(PrintSet(comm, truth)); 134*8e89d99cSJDBetteridge 135*8e89d99cSJDBetteridge /* Assert the intersection corresponds to primes calculated using 136*8e89d99cSJDBetteridge trial division 137*8e89d99cSJDBetteridge */ 138*8e89d99cSJDBetteridge PetscCall(AssertSetsEqual(local_set, truth)); 139*8e89d99cSJDBetteridge 140*8e89d99cSJDBetteridge PetscCall(PetscFree(local_set)); 141*8e89d99cSJDBetteridge PetscCall(PetscFree(truth)); 142*8e89d99cSJDBetteridge PetscFunctionReturn(0); 143*8e89d99cSJDBetteridge } 144*8e89d99cSJDBetteridge 145*8e89d99cSJDBetteridge /* Main executes the individual tests in a predefined order */ 146*8e89d99cSJDBetteridge int main(int argc, char **argv) 147*8e89d99cSJDBetteridge { 148*8e89d99cSJDBetteridge PetscFunctionBeginUser; 149*8e89d99cSJDBetteridge PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 150*8e89d99cSJDBetteridge 151*8e89d99cSJDBetteridge PetscCall(test_sieve(PETSC_COMM_WORLD)); 152*8e89d99cSJDBetteridge 153*8e89d99cSJDBetteridge PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ALL PASSED\n")); 154*8e89d99cSJDBetteridge PetscCall(PetscFinalize()); 155*8e89d99cSJDBetteridge return 0; 156*8e89d99cSJDBetteridge } 157*8e89d99cSJDBetteridge 158*8e89d99cSJDBetteridge /*TEST 159*8e89d99cSJDBetteridge 160*8e89d99cSJDBetteridge testset: 161*8e89d99cSJDBetteridge test: 162*8e89d99cSJDBetteridge nsize: 2 163*8e89d99cSJDBetteridge suffix: 2 164*8e89d99cSJDBetteridge output_file: output/ex63_2.out 165*8e89d99cSJDBetteridge test: 166*8e89d99cSJDBetteridge nsize: 3 167*8e89d99cSJDBetteridge suffix: 3 168*8e89d99cSJDBetteridge output_file: output/ex63_3.out 169*8e89d99cSJDBetteridge test: 170*8e89d99cSJDBetteridge nsize: 4 171*8e89d99cSJDBetteridge suffix: 4 172*8e89d99cSJDBetteridge output_file: output/ex63_4.out 173*8e89d99cSJDBetteridge 174*8e89d99cSJDBetteridge TEST*/ 175