1*ce605777SToby Isaac 2*ce605777SToby Isaac #include <petsc/private/petscimpl.h> 3*ce605777SToby Isaac #include <petscis.h> 4*ce605777SToby Isaac 5*ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 7*ce605777SToby Isaac { 8*ce605777SToby Isaac PetscInt diff; 9*ce605777SToby Isaac PetscInt split, mid, partner; 10*ce605777SToby Isaac PetscErrorCode ierr; 11*ce605777SToby Isaac 12*ce605777SToby Isaac PetscFunctionBegin; 13*ce605777SToby Isaac diff = rankEnd - rankStart; 14*ce605777SToby Isaac if (diff <= 0) PetscFunctionReturn(0); 15*ce605777SToby Isaac if (diff == 1) { 16*ce605777SToby Isaac if (forward) { 17*ce605777SToby Isaac ierr = PetscSortInt((PetscInt) n, keys);CHKERRQ(ierr); 18*ce605777SToby Isaac } else { 19*ce605777SToby Isaac ierr = PetscSortReverseInt((PetscInt) n, keys);CHKERRQ(ierr); 20*ce605777SToby Isaac } 21*ce605777SToby Isaac PetscFunctionReturn(0); 22*ce605777SToby Isaac } 23*ce605777SToby Isaac split = 1; 24*ce605777SToby Isaac while (2 * split < diff) split *= 2; 25*ce605777SToby Isaac mid = rankStart + split; 26*ce605777SToby Isaac if (rank < mid) { 27*ce605777SToby Isaac partner = rank + split; 28*ce605777SToby Isaac } else { 29*ce605777SToby Isaac partner = rank - split; 30*ce605777SToby Isaac } 31*ce605777SToby Isaac if (partner < rankEnd) { 32*ce605777SToby Isaac PetscMPIInt i; 33*ce605777SToby Isaac 34*ce605777SToby Isaac ierr = MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);CHKERRQ(ierr); 35*ce605777SToby Isaac if ((rank < partner) == (forward == PETSC_TRUE)) { 36*ce605777SToby Isaac for (i = 0; i < n; i++) { 37*ce605777SToby Isaac keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 38*ce605777SToby Isaac } 39*ce605777SToby Isaac } else { 40*ce605777SToby Isaac for (i = 0; i < n; i++) { 41*ce605777SToby Isaac keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 42*ce605777SToby Isaac } 43*ce605777SToby Isaac } 44*ce605777SToby Isaac } 45*ce605777SToby Isaac /* divide and conquer */ 46*ce605777SToby Isaac if (rank < mid) { 47*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);CHKERRQ(ierr); 48*ce605777SToby Isaac } else { 49*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 50*ce605777SToby Isaac } 51*ce605777SToby Isaac PetscFunctionReturn(0); 52*ce605777SToby Isaac } 53*ce605777SToby Isaac 54*ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 55*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 56*ce605777SToby Isaac { 57*ce605777SToby Isaac PetscInt diff; 58*ce605777SToby Isaac PetscInt mid; 59*ce605777SToby Isaac PetscErrorCode ierr; 60*ce605777SToby Isaac 61*ce605777SToby Isaac PetscFunctionBegin; 62*ce605777SToby Isaac diff = rankEnd - rankStart; 63*ce605777SToby Isaac if (diff <= 0) PetscFunctionReturn(0); 64*ce605777SToby Isaac if (diff == 1) { 65*ce605777SToby Isaac if (forward) { 66*ce605777SToby Isaac ierr = PetscSortInt(n, keys);CHKERRQ(ierr); 67*ce605777SToby Isaac } else { 68*ce605777SToby Isaac ierr = PetscSortReverseInt(n, keys);CHKERRQ(ierr); 69*ce605777SToby Isaac } 70*ce605777SToby Isaac PetscFunctionReturn(0); 71*ce605777SToby Isaac } 72*ce605777SToby Isaac mid = rankStart + diff / 2; 73*ce605777SToby Isaac /* divide and conquer */ 74*ce605777SToby Isaac if (rank < mid) { 75*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, !forward);CHKERRQ(ierr); 76*ce605777SToby Isaac } else { 77*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 78*ce605777SToby Isaac } 79*ce605777SToby Isaac /* bitonic merge */ 80*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 81*ce605777SToby Isaac PetscFunctionReturn(0); 82*ce605777SToby Isaac } 83*ce605777SToby Isaac 84*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 85*ce605777SToby Isaac { 86*ce605777SToby Isaac PetscMPIInt size, rank, tag, mpin; 87*ce605777SToby Isaac PetscInt *buffer; 88*ce605777SToby Isaac PetscErrorCode ierr; 89*ce605777SToby Isaac 90*ce605777SToby Isaac PetscFunctionBegin; 91*ce605777SToby Isaac PetscValidIntPointer(keys, 3); 92*ce605777SToby Isaac ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr); 93*ce605777SToby Isaac ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 94*ce605777SToby Isaac ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 95*ce605777SToby Isaac ierr = PetscMPIIntCast(n, &mpin);CHKERRQ(ierr); 96*ce605777SToby Isaac ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr); 97*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);CHKERRQ(ierr); 98*ce605777SToby Isaac ierr = PetscFree(buffer);CHKERRQ(ierr); 99*ce605777SToby Isaac PetscFunctionReturn(0); 100*ce605777SToby Isaac } 101*ce605777SToby Isaac 102*ce605777SToby Isaac static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 103*ce605777SToby Isaac { 104*ce605777SToby Isaac PetscMPIInt size, rank; 105*ce605777SToby Isaac PetscInt *pivots, *finalpivots, i; 106*ce605777SToby Isaac PetscInt non_empty, my_first, count; 107*ce605777SToby Isaac PetscMPIInt *keys_per, max_keys_per; 108*ce605777SToby Isaac PetscErrorCode ierr; 109*ce605777SToby Isaac 110*ce605777SToby Isaac PetscFunctionBegin; 111*ce605777SToby Isaac ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 112*ce605777SToby Isaac ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr); 113*ce605777SToby Isaac 114*ce605777SToby Isaac /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 115*ce605777SToby Isaac ierr = PetscMalloc1(size - 1, &pivots);CHKERRQ(ierr); 116*ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 117*ce605777SToby Isaac PetscInt index; 118*ce605777SToby Isaac 119*ce605777SToby Isaac if (!mapin->n) { 120*ce605777SToby Isaac /* if this rank is empty, put "infinity" in the list. each process knows how many empty 121*ce605777SToby Isaac * processes are in the layout, so those values will be ignored from the end of the sorted 122*ce605777SToby Isaac * pivots */ 123*ce605777SToby Isaac pivots[i] = PETSC_MAX_INT; 124*ce605777SToby Isaac } else { 125*ce605777SToby Isaac /* match the distribution to the desired output described by mapout by getting the index 126*ce605777SToby Isaac * that is approximately the appropriate fraction through the list */ 127*ce605777SToby Isaac index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N); 128*ce605777SToby Isaac index = PetscMin(index, (mapin->n - 1)); 129*ce605777SToby Isaac index = PetscMax(index, 0); 130*ce605777SToby Isaac pivots[i] = keysin[index]; 131*ce605777SToby Isaac } 132*ce605777SToby Isaac } 133*ce605777SToby Isaac /* sort the pivots in parallel */ 134*ce605777SToby Isaac ierr = PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);CHKERRQ(ierr); 135*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 136*ce605777SToby Isaac { 137*ce605777SToby Isaac PetscBool sorted; 138*ce605777SToby Isaac 139*ce605777SToby Isaac ierr = PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);CHKERRQ(ierr); 140*ce605777SToby Isaac if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");CHKERRQ(ierr); 141*ce605777SToby Isaac } 142*ce605777SToby Isaac #endif 143*ce605777SToby Isaac 144*ce605777SToby Isaac /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 145*ce605777SToby Isaac * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 146*ce605777SToby Isaac non_empty = size; 147*ce605777SToby Isaac for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--; 148*ce605777SToby Isaac ierr = PetscCalloc1(size + 1, &keys_per);CHKERRQ(ierr); 149*ce605777SToby Isaac my_first = -1; 150*ce605777SToby Isaac if (non_empty) { 151*ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 152*ce605777SToby Isaac size_t sample = (i + 1) * non_empty - 1; 153*ce605777SToby Isaac size_t sample_rank = sample / (size - 1); 154*ce605777SToby Isaac 155*ce605777SToby Isaac keys_per[sample_rank]++; 156*ce605777SToby Isaac if (my_first < 0 && sample_rank == rank) { 157*ce605777SToby Isaac my_first = (PetscInt) (sample - sample_rank * (size - 1)); 158*ce605777SToby Isaac } 159*ce605777SToby Isaac } 160*ce605777SToby Isaac } 161*ce605777SToby Isaac for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 162*ce605777SToby Isaac ierr = PetscMalloc1(size * max_keys_per, &finalpivots);CHKERRQ(ierr); 163*ce605777SToby Isaac /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 164*ce605777SToby Isaac * and allgather them */ 165*ce605777SToby Isaac for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 166*ce605777SToby Isaac for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 167*ce605777SToby Isaac ierr = MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm); 168*ce605777SToby Isaac for (i = 0, count = 0; i < size; i++) { 169*ce605777SToby Isaac PetscInt j; 170*ce605777SToby Isaac 171*ce605777SToby Isaac for (j = 0; j < max_keys_per; j++) { 172*ce605777SToby Isaac if (j < keys_per[i]) { 173*ce605777SToby Isaac finalpivots[count++] = finalpivots[i * max_keys_per + j]; 174*ce605777SToby Isaac } 175*ce605777SToby Isaac } 176*ce605777SToby Isaac } 177*ce605777SToby Isaac *outpivots = finalpivots; 178*ce605777SToby Isaac ierr = PetscFree(keys_per);CHKERRQ(ierr); 179*ce605777SToby Isaac ierr = PetscFree(pivots);CHKERRQ(ierr); 180*ce605777SToby Isaac PetscFunctionReturn(0); 181*ce605777SToby Isaac } 182*ce605777SToby Isaac 183*ce605777SToby Isaac static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 184*ce605777SToby Isaac { 185*ce605777SToby Isaac PetscMPIInt size, rank; 186*ce605777SToby Isaac PetscInt myOffset, nextOffset; 187*ce605777SToby Isaac PetscInt i; 188*ce605777SToby Isaac PetscMPIInt total, filled; 189*ce605777SToby Isaac PetscMPIInt sender, nfirst, nsecond; 190*ce605777SToby Isaac PetscMPIInt firsttag, secondtag; 191*ce605777SToby Isaac MPI_Request firstreqrcv; 192*ce605777SToby Isaac MPI_Request *firstreqs; 193*ce605777SToby Isaac MPI_Request *secondreqs; 194*ce605777SToby Isaac MPI_Status firststatus; 195*ce605777SToby Isaac 196*ce605777SToby Isaac PetscErrorCode ierr; 197*ce605777SToby Isaac 198*ce605777SToby Isaac PetscFunctionBegin; 199*ce605777SToby Isaac ierr = MPI_Comm_size(map->comm, &size);CHKERRQ(ierr); 200*ce605777SToby Isaac ierr = MPI_Comm_rank(map->comm, &rank);CHKERRQ(ierr); 201*ce605777SToby Isaac ierr = PetscCommGetNewTag(map->comm, &firsttag);CHKERRQ(ierr); 202*ce605777SToby Isaac ierr = PetscCommGetNewTag(map->comm, &secondtag);CHKERRQ(ierr); 203*ce605777SToby Isaac myOffset = 0; 204*ce605777SToby Isaac ierr = PetscMalloc2(size, &firstreqs, size, &secondreqs);CHKERRQ(ierr); 205*ce605777SToby Isaac ierr = MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);CHKERRQ(ierr); 206*ce605777SToby Isaac myOffset = nextOffset - n; 207*ce605777SToby Isaac total = map->range[rank + 1] - map->range[rank]; 208*ce605777SToby Isaac if (total > 0) { 209*ce605777SToby Isaac ierr = MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);CHKERRQ(ierr); 210*ce605777SToby Isaac } 211*ce605777SToby Isaac for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 212*ce605777SToby Isaac PetscInt itotal; 213*ce605777SToby Isaac PetscInt overlap, oStart, oEnd; 214*ce605777SToby Isaac 215*ce605777SToby Isaac itotal = map->range[i + 1] - map->range[i]; 216*ce605777SToby Isaac if (itotal <= 0) continue; 217*ce605777SToby Isaac oStart = PetscMax(myOffset, map->range[i]); 218*ce605777SToby Isaac oEnd = PetscMin(nextOffset, map->range[i + 1]); 219*ce605777SToby Isaac overlap = oEnd - oStart; 220*ce605777SToby Isaac if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 221*ce605777SToby Isaac /* send first message */ 222*ce605777SToby Isaac ierr = MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));CHKERRQ(ierr); 223*ce605777SToby Isaac } else if (overlap > 0) { 224*ce605777SToby Isaac /* send second message */ 225*ce605777SToby Isaac ierr = MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr); 226*ce605777SToby Isaac } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 227*ce605777SToby Isaac /* send empty second message */ 228*ce605777SToby Isaac ierr = MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr); 229*ce605777SToby Isaac } 230*ce605777SToby Isaac } 231*ce605777SToby Isaac filled = 0; 232*ce605777SToby Isaac sender = -1; 233*ce605777SToby Isaac if (total > 0) { 234*ce605777SToby Isaac ierr = MPI_Wait(&firstreqrcv, &firststatus);CHKERRQ(ierr); 235*ce605777SToby Isaac sender = firststatus.MPI_SOURCE; 236*ce605777SToby Isaac ierr = MPI_Get_count(&firststatus, MPIU_INT, &filled);CHKERRQ(ierr); 237*ce605777SToby Isaac } 238*ce605777SToby Isaac while (filled < total) { 239*ce605777SToby Isaac PetscMPIInt mfilled; 240*ce605777SToby Isaac MPI_Status stat; 241*ce605777SToby Isaac 242*ce605777SToby Isaac sender++; 243*ce605777SToby Isaac ierr = MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);CHKERRQ(ierr); 244*ce605777SToby Isaac ierr = MPI_Get_count(&stat, MPIU_INT, &mfilled);CHKERRQ(ierr); 245*ce605777SToby Isaac filled += mfilled; 246*ce605777SToby Isaac } 247*ce605777SToby Isaac ierr = MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 248*ce605777SToby Isaac ierr = MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 249*ce605777SToby Isaac ierr = PetscFree2(firstreqs, secondreqs);CHKERRQ(ierr); 250*ce605777SToby Isaac PetscFunctionReturn(0); 251*ce605777SToby Isaac } 252*ce605777SToby Isaac 253*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 254*ce605777SToby Isaac { 255*ce605777SToby Isaac PetscMPIInt size, rank; 256*ce605777SToby Isaac PetscInt *pivots, *buffer; 257*ce605777SToby Isaac PetscInt i, j; 258*ce605777SToby Isaac PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 259*ce605777SToby Isaac PetscErrorCode ierr; 260*ce605777SToby Isaac 261*ce605777SToby Isaac PetscFunctionBegin; 262*ce605777SToby Isaac ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 263*ce605777SToby Isaac ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr); 264*ce605777SToby Isaac ierr = PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);CHKERRQ(ierr); 265*ce605777SToby Isaac /* sort locally */ 266*ce605777SToby Isaac ierr = PetscSortInt(mapin->n, keysin);CHKERRQ(ierr); 267*ce605777SToby Isaac /* get P - 1 pivots */ 268*ce605777SToby Isaac ierr = PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);CHKERRQ(ierr); 269*ce605777SToby Isaac /* determine which entries in the sorted array go to which other processes based on the pivots */ 270*ce605777SToby Isaac for (i = 0, j = 0; i < size - 1; i++) { 271*ce605777SToby Isaac PetscInt prev = j; 272*ce605777SToby Isaac 273*ce605777SToby Isaac while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 274*ce605777SToby Isaac offsets_snd[i] = prev; 275*ce605777SToby Isaac keys_per_snd[i] = j - prev; 276*ce605777SToby Isaac } 277*ce605777SToby Isaac offsets_snd[size - 1] = j; 278*ce605777SToby Isaac keys_per_snd[size - 1] = mapin->n - j; 279*ce605777SToby Isaac offsets_snd[size] = mapin->n; 280*ce605777SToby Isaac /* get the incoming sizes */ 281*ce605777SToby Isaac ierr = MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);CHKERRQ(ierr); 282*ce605777SToby Isaac offsets_rcv[0] = 0; 283*ce605777SToby Isaac for (i = 0; i < size; i++) { 284*ce605777SToby Isaac offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i]; 285*ce605777SToby Isaac } 286*ce605777SToby Isaac nrecv = offsets_rcv[size]; 287*ce605777SToby Isaac /* all to all exchange */ 288*ce605777SToby Isaac ierr = PetscMalloc1(nrecv, &buffer);CHKERRQ(ierr); 289*ce605777SToby Isaac ierr = MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);CHKERRQ(ierr); 290*ce605777SToby Isaac ierr = PetscFree(pivots);CHKERRQ(ierr); 291*ce605777SToby Isaac ierr = PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);CHKERRQ(ierr); 292*ce605777SToby Isaac 293*ce605777SToby Isaac /* local sort */ 294*ce605777SToby Isaac ierr = PetscSortInt(nrecv, buffer);CHKERRQ(ierr); 295*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 296*ce605777SToby Isaac { 297*ce605777SToby Isaac PetscBool sorted; 298*ce605777SToby Isaac 299*ce605777SToby Isaac ierr = PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);CHKERRQ(ierr); 300*ce605777SToby Isaac if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");CHKERRQ(ierr); 301*ce605777SToby Isaac } 302*ce605777SToby Isaac #endif 303*ce605777SToby Isaac 304*ce605777SToby Isaac /* redistribute to the desired order */ 305*ce605777SToby Isaac ierr = PetscParallelRedistribute(mapout, nrecv, buffer, keysout);CHKERRQ(ierr); 306*ce605777SToby Isaac ierr = PetscFree(buffer);CHKERRQ(ierr); 307*ce605777SToby Isaac PetscFunctionReturn(0); 308*ce605777SToby Isaac } 309*ce605777SToby Isaac 310*ce605777SToby Isaac /*@ 311*ce605777SToby Isaac PetscParallelSortInt - Globally sort a distrbuted array of integers 312*ce605777SToby Isaac 313*ce605777SToby Isaac Collective 314*ce605777SToby Isaac 315*ce605777SToby Isaac Input Parameters: 316*ce605777SToby Isaac + mapin - PetscLayout describing the distribution of the input keys 317*ce605777SToby Isaac . mapout - PetscLayout describing the distribution of the output keys 318*ce605777SToby Isaac - keysin - the pre-sorted array of integers 319*ce605777SToby Isaac 320*ce605777SToby Isaac Output Parameters: 321*ce605777SToby Isaac . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 322*ce605777SToby Isaac 323*ce605777SToby Isaac Level: developer 324*ce605777SToby Isaac 325*ce605777SToby Isaac Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 326*ce605777SToby Isaac 327*ce605777SToby Isaac - sorts locally 328*ce605777SToby Isaac - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 329*ce605777SToby Isaac - using to the pivots to repartition the keys by all-to-all exchange 330*ce605777SToby Isaac - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 331*ce605777SToby Isaac - redistributing to match the mapout layout 332*ce605777SToby Isaac 333*ce605777SToby Isaac If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 334*ce605777SToby Isaac 335*ce605777SToby Isaac .seealso: PetscParallelSortedInt() 336*ce605777SToby Isaac @*/ 337*ce605777SToby Isaac PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 338*ce605777SToby Isaac { 339*ce605777SToby Isaac PetscMPIInt size; 340*ce605777SToby Isaac PetscMPIInt result; 341*ce605777SToby Isaac PetscInt *keysincopy = NULL; 342*ce605777SToby Isaac PetscErrorCode ierr; 343*ce605777SToby Isaac 344*ce605777SToby Isaac PetscFunctionBegin; 345*ce605777SToby Isaac PetscValidPointer(mapin, 1); 346*ce605777SToby Isaac PetscValidPointer(mapout, 2); 347*ce605777SToby Isaac ierr = MPI_Comm_compare(mapin->comm, mapout->comm, &result);CHKERRQ(ierr); 348*ce605777SToby Isaac if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 349*ce605777SToby Isaac ierr = PetscLayoutSetUp(mapin);CHKERRQ(ierr); 350*ce605777SToby Isaac ierr = PetscLayoutSetUp(mapout);CHKERRQ(ierr); 351*ce605777SToby Isaac if (mapin->n) PetscValidIntPointer(keysin, 3); 352*ce605777SToby Isaac if (mapout->n) PetscValidIntPointer(keysout, 4); 353*ce605777SToby Isaac if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N); 354*ce605777SToby Isaac ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 355*ce605777SToby Isaac if (size == 1) { 356*ce605777SToby Isaac if (keysout != keysin) { 357*ce605777SToby Isaac ierr = PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr); 358*ce605777SToby Isaac } 359*ce605777SToby Isaac ierr = PetscSortInt(mapout->n, keysout);CHKERRQ(ierr); 360*ce605777SToby Isaac if (size == 1) PetscFunctionReturn(0); 361*ce605777SToby Isaac } 362*ce605777SToby Isaac if (keysout != keysin) { 363*ce605777SToby Isaac ierr = PetscMalloc1(mapin->n, &keysincopy);CHKERRQ(ierr); 364*ce605777SToby Isaac ierr = PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr); 365*ce605777SToby Isaac keysin = keysincopy; 366*ce605777SToby Isaac } 367*ce605777SToby Isaac ierr = PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);CHKERRQ(ierr); 368*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 369*ce605777SToby Isaac { 370*ce605777SToby Isaac PetscBool sorted; 371*ce605777SToby Isaac 372*ce605777SToby Isaac ierr = PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);CHKERRQ(ierr); 373*ce605777SToby Isaac if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");CHKERRQ(ierr); 374*ce605777SToby Isaac } 375*ce605777SToby Isaac #endif 376*ce605777SToby Isaac ierr = PetscFree(keysincopy);CHKERRQ(ierr); 377*ce605777SToby Isaac PetscFunctionReturn(0); 378*ce605777SToby Isaac } 379