1ce605777SToby Isaac #include <petsc/private/petscimpl.h> 2579e05a6SToby Isaac #include <petscis.h> /*I "petscis.h" I*/ 3ce605777SToby Isaac 4ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 5d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 6d71ae5a4SJacob Faibussowitsch { 7ce605777SToby Isaac PetscInt diff; 86497c311SBarry Smith PetscMPIInt split, mid, partner; 9ce605777SToby Isaac 10ce605777SToby Isaac PetscFunctionBegin; 11ce605777SToby Isaac diff = rankEnd - rankStart; 123ba16761SJacob Faibussowitsch if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 13ce605777SToby Isaac if (diff == 1) { 14ce605777SToby Isaac if (forward) { 156497c311SBarry Smith PetscCall(PetscSortInt(n, keys)); 16ce605777SToby Isaac } else { 176497c311SBarry Smith PetscCall(PetscSortReverseInt(n, keys)); 18ce605777SToby Isaac } 193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20ce605777SToby Isaac } 21ce605777SToby Isaac split = 1; 22ce605777SToby Isaac while (2 * split < diff) split *= 2; 23ce605777SToby Isaac mid = rankStart + split; 24ce605777SToby Isaac if (rank < mid) { 25ce605777SToby Isaac partner = rank + split; 26ce605777SToby Isaac } else { 27ce605777SToby Isaac partner = rank - split; 28ce605777SToby Isaac } 29ce605777SToby Isaac if (partner < rankEnd) { 30ce605777SToby Isaac PetscMPIInt i; 31ce605777SToby Isaac 329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 33ce605777SToby Isaac if ((rank < partner) == (forward == PETSC_TRUE)) { 34ad540459SPierre Jolivet for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 35ce605777SToby Isaac } else { 36ad540459SPierre Jolivet for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 37ce605777SToby Isaac } 38ce605777SToby Isaac } 39ce605777SToby Isaac /* divide and conquer */ 40ce605777SToby Isaac if (rank < mid) { 419566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 42ce605777SToby Isaac } else { 439566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 44ce605777SToby Isaac } 453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46ce605777SToby Isaac } 47ce605777SToby Isaac 48ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 49d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 50d71ae5a4SJacob Faibussowitsch { 516497c311SBarry Smith PetscMPIInt diff, mid; 52ce605777SToby Isaac 53ce605777SToby Isaac PetscFunctionBegin; 54ce605777SToby Isaac diff = rankEnd - rankStart; 553ba16761SJacob Faibussowitsch if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 56ce605777SToby Isaac if (diff == 1) { 57ce605777SToby Isaac if (forward) { 589566063dSJacob Faibussowitsch PetscCall(PetscSortInt(n, keys)); 59ce605777SToby Isaac } else { 609566063dSJacob Faibussowitsch PetscCall(PetscSortReverseInt(n, keys)); 61ce605777SToby Isaac } 623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63ce605777SToby Isaac } 64ce605777SToby Isaac mid = rankStart + diff / 2; 65ce605777SToby Isaac /* divide and conquer */ 66ce605777SToby Isaac if (rank < mid) { 679566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 68ce605777SToby Isaac } else { 699566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 70ce605777SToby Isaac } 71ce605777SToby Isaac /* bitonic merge */ 729566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 74ce605777SToby Isaac } 75ce605777SToby Isaac 76d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 77d71ae5a4SJacob Faibussowitsch { 78ce605777SToby Isaac PetscMPIInt size, rank, tag, mpin; 79ce605777SToby Isaac PetscInt *buffer; 80ce605777SToby Isaac 81ce605777SToby Isaac PetscFunctionBegin; 824f572ea9SToby Isaac PetscAssertPointer(keys, 3); 839566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm, &tag)); 849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 869566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(n, &mpin)); 879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &buffer)); 889566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 899566063dSJacob Faibussowitsch PetscCall(PetscFree(buffer)); 903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 91ce605777SToby Isaac } 92ce605777SToby Isaac 93d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 94d71ae5a4SJacob Faibussowitsch { 95ce605777SToby Isaac PetscMPIInt size, rank; 96ce605777SToby Isaac PetscInt *pivots, *finalpivots, i; 97ce605777SToby Isaac PetscInt non_empty, my_first, count; 98ce605777SToby Isaac PetscMPIInt *keys_per, max_keys_per; 99ce605777SToby Isaac 100ce605777SToby Isaac PetscFunctionBegin; 1019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 1029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 103ce605777SToby Isaac 104ce605777SToby Isaac /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 1059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size - 1, &pivots)); 106ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 107ce605777SToby Isaac PetscInt index; 108ce605777SToby Isaac 109ce605777SToby Isaac if (!mapin->n) { 110ce605777SToby Isaac /* if this rank is empty, put "infinity" in the list. each process knows how many empty 111ce605777SToby Isaac * processes are in the layout, so those values will be ignored from the end of the sorted 112ce605777SToby Isaac * pivots */ 113*1690c2aeSBarry Smith pivots[i] = PETSC_INT_MAX; 114ce605777SToby Isaac } else { 115ce605777SToby Isaac /* match the distribution to the desired output described by mapout by getting the index 116ce605777SToby Isaac * that is approximately the appropriate fraction through the list */ 117ce605777SToby Isaac index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 118ce605777SToby Isaac index = PetscMin(index, (mapin->n - 1)); 119ce605777SToby Isaac index = PetscMax(index, 0); 120ce605777SToby Isaac pivots[i] = keysin[index]; 121ce605777SToby Isaac } 122ce605777SToby Isaac } 123ce605777SToby Isaac /* sort the pivots in parallel */ 1249566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 12576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 126ce605777SToby Isaac PetscBool sorted; 127ce605777SToby Isaac 1289566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 12928b400f6SJacob Faibussowitsch PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 130ce605777SToby Isaac } 131ce605777SToby Isaac 132ce605777SToby Isaac /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 133ce605777SToby Isaac * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 134ce605777SToby Isaac non_empty = size; 1359371c9d4SSatish Balay for (i = 0; i < size; i++) 1369371c9d4SSatish Balay if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 1379566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size + 1, &keys_per)); 138ce605777SToby Isaac my_first = -1; 139ce605777SToby Isaac if (non_empty) { 140ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 141ce605777SToby Isaac size_t sample = (i + 1) * non_empty - 1; 142ce605777SToby Isaac size_t sample_rank = sample / (size - 1); 143ce605777SToby Isaac 144ce605777SToby Isaac keys_per[sample_rank]++; 145ad540459SPierre Jolivet if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1)); 146ce605777SToby Isaac } 147ce605777SToby Isaac } 148ce605777SToby Isaac for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 1499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 150ce605777SToby Isaac /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 151ce605777SToby Isaac * and allgather them */ 152ce605777SToby Isaac for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 153*1690c2aeSBarry Smith for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_INT_MAX; 1549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 155ce605777SToby Isaac for (i = 0, count = 0; i < size; i++) { 156ce605777SToby Isaac PetscInt j; 157ce605777SToby Isaac 158ce605777SToby Isaac for (j = 0; j < max_keys_per; j++) { 159ad540459SPierre Jolivet if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j]; 160ce605777SToby Isaac } 161ce605777SToby Isaac } 162ce605777SToby Isaac *outpivots = finalpivots; 1639566063dSJacob Faibussowitsch PetscCall(PetscFree(keys_per)); 1649566063dSJacob Faibussowitsch PetscCall(PetscFree(pivots)); 1653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 166ce605777SToby Isaac } 167ce605777SToby Isaac 168d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 169d71ae5a4SJacob Faibussowitsch { 170ce605777SToby Isaac PetscMPIInt size, rank; 171ce605777SToby Isaac PetscInt myOffset, nextOffset; 1726497c311SBarry Smith PetscCount total, filled; 173ce605777SToby Isaac PetscMPIInt sender, nfirst, nsecond; 174ce605777SToby Isaac PetscMPIInt firsttag, secondtag; 175ce605777SToby Isaac MPI_Request firstreqrcv; 176ce605777SToby Isaac MPI_Request *firstreqs; 177ce605777SToby Isaac MPI_Request *secondreqs; 178ce605777SToby Isaac 179ce605777SToby Isaac PetscFunctionBegin; 1809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 1829566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 1839566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 184ce605777SToby Isaac myOffset = 0; 1859566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 1869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 187ce605777SToby Isaac myOffset = nextOffset - n; 188ce605777SToby Isaac total = map->range[rank + 1] - map->range[rank]; 1896497c311SBarry Smith if (total > 0) PetscCallMPI(MPIU_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 1906497c311SBarry Smith nsecond = 0; 1916497c311SBarry Smith nfirst = 0; 1926497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 193ce605777SToby Isaac PetscInt itotal; 194ce605777SToby Isaac PetscInt overlap, oStart, oEnd; 195ce605777SToby Isaac 196ce605777SToby Isaac itotal = map->range[i + 1] - map->range[i]; 197ce605777SToby Isaac if (itotal <= 0) continue; 198ce605777SToby Isaac oStart = PetscMax(myOffset, map->range[i]); 199ce605777SToby Isaac oEnd = PetscMin(nextOffset, map->range[i + 1]); 200ce605777SToby Isaac overlap = oEnd - oStart; 201ce605777SToby Isaac if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 202ce605777SToby Isaac /* send first message */ 2036497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++])); 204ce605777SToby Isaac } else if (overlap > 0) { 205ce605777SToby Isaac /* send second message */ 2066497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 207ce605777SToby Isaac } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 208ce605777SToby Isaac /* send empty second message */ 2096497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 210ce605777SToby Isaac } 211ce605777SToby Isaac } 212ce605777SToby Isaac filled = 0; 213ce605777SToby Isaac sender = -1; 214ce605777SToby Isaac if (total > 0) { 2156497c311SBarry Smith MPI_Status status; 2166497c311SBarry Smith 2176497c311SBarry Smith PetscCallMPI(MPI_Wait(&firstreqrcv, &status)); 2186497c311SBarry Smith sender = status.MPI_SOURCE; 2196497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &filled)); 220ce605777SToby Isaac while (filled < total) { 2216497c311SBarry Smith PetscCount mfilled; 222ce605777SToby Isaac 223ce605777SToby Isaac sender++; 2246497c311SBarry Smith PetscCallMPI(MPIU_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &status)); 2256497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &mfilled)); 226ce605777SToby Isaac filled += mfilled; 227ce605777SToby Isaac } 2286497c311SBarry Smith } 2299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 2309566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 2319566063dSJacob Faibussowitsch PetscCall(PetscFree2(firstreqs, secondreqs)); 2323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 233ce605777SToby Isaac } 234ce605777SToby Isaac 235d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 236d71ae5a4SJacob Faibussowitsch { 237ce605777SToby Isaac PetscMPIInt size, rank; 2386df2664eSToby Isaac PetscInt *pivots = NULL, *buffer; 2396497c311SBarry Smith PetscInt j; 240ce605777SToby Isaac PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 241ce605777SToby Isaac 242ce605777SToby Isaac PetscFunctionBegin; 2439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 2449566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 2459566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 246ce605777SToby Isaac /* sort locally */ 2479566063dSJacob Faibussowitsch PetscCall(PetscSortInt(mapin->n, keysin)); 248ce605777SToby Isaac /* get P - 1 pivots */ 2499566063dSJacob Faibussowitsch PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 250ce605777SToby Isaac /* determine which entries in the sorted array go to which other processes based on the pivots */ 2516497c311SBarry Smith j = 0; 2526497c311SBarry Smith for (PetscMPIInt i = 0; i < size - 1; i++) { 253ce605777SToby Isaac PetscInt prev = j; 254ce605777SToby Isaac 255ce605777SToby Isaac while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 2566497c311SBarry Smith PetscCall(PetscMPIIntCast(prev, &offsets_snd[i])); 2576497c311SBarry Smith PetscCall(PetscMPIIntCast(j - prev, &keys_per_snd[i])); 258ce605777SToby Isaac } 2596497c311SBarry Smith PetscCall(PetscMPIIntCast(j, &offsets_snd[size - 1])); 2606497c311SBarry Smith PetscCall(PetscMPIIntCast(mapin->n - j, &keys_per_snd[size - 1])); 2616497c311SBarry Smith PetscCall(PetscMPIIntCast(mapin->n, &offsets_snd[size])); 262ce605777SToby Isaac /* get the incoming sizes */ 2639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 264ce605777SToby Isaac offsets_rcv[0] = 0; 2656497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; 266ce605777SToby Isaac nrecv = offsets_rcv[size]; 267ce605777SToby Isaac /* all to all exchange */ 2689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrecv, &buffer)); 2699566063dSJacob Faibussowitsch PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 2709566063dSJacob Faibussowitsch PetscCall(PetscFree(pivots)); 2719566063dSJacob Faibussowitsch PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 272ce605777SToby Isaac 273ce605777SToby Isaac /* local sort */ 2749566063dSJacob Faibussowitsch PetscCall(PetscSortInt(nrecv, buffer)); 275ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 276ce605777SToby Isaac { 277ce605777SToby Isaac PetscBool sorted; 278ce605777SToby Isaac 2799566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 28028b400f6SJacob Faibussowitsch PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 281ce605777SToby Isaac } 282ce605777SToby Isaac #endif 283ce605777SToby Isaac 284ce605777SToby Isaac /* redistribute to the desired order */ 2859566063dSJacob Faibussowitsch PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 2869566063dSJacob Faibussowitsch PetscCall(PetscFree(buffer)); 2873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 288ce605777SToby Isaac } 289ce605777SToby Isaac 290ce605777SToby Isaac /*@ 291064ec1f3Sprj- PetscParallelSortInt - Globally sort a distributed array of integers 292ce605777SToby Isaac 293ce605777SToby Isaac Collective 294ce605777SToby Isaac 295ce605777SToby Isaac Input Parameters: 296cab54364SBarry Smith + mapin - `PetscLayout` describing the distribution of the input keys 297cab54364SBarry Smith . mapout - `PetscLayout` describing the desired distribution of the output keys 298ce605777SToby Isaac - keysin - the pre-sorted array of integers 299ce605777SToby Isaac 300064ec1f3Sprj- Output Parameter: 3016497c311SBarry Smith . keysout - the array in which the sorted integers will be stored. If `mapin` == `mapout`, then `keysin` may be equal to `keysout`. 302ce605777SToby Isaac 303ce605777SToby Isaac Level: developer 304ce605777SToby Isaac 305cab54364SBarry Smith Notes: 30638b5cf2dSJacob Faibussowitsch 307e33f79d8SJacob Faibussowitsch This implements a distributed samplesort, which, with local array sizes n_in and n_out, 308e33f79d8SJacob Faibussowitsch global size N, and global number of MPI processes P, does\: 309cab54364SBarry Smith .vb 310ce605777SToby Isaac - sorts locally 311ce605777SToby Isaac - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 312ce605777SToby Isaac - using to the pivots to repartition the keys by all-to-all exchange 313ce605777SToby Isaac - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 314ce605777SToby Isaac - redistributing to match the mapout layout 315cab54364SBarry Smith .ve 316ce605777SToby Isaac 317e33f79d8SJacob Faibussowitsch If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`. 318ce605777SToby Isaac 319cab54364SBarry Smith .seealso: `PetscSortInt()`, `PetscParallelSortedInt()` 320ce605777SToby Isaac @*/ 321d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 322d71ae5a4SJacob Faibussowitsch { 323ce605777SToby Isaac PetscMPIInt size; 324ce605777SToby Isaac PetscMPIInt result; 325ce605777SToby Isaac PetscInt *keysincopy = NULL; 326ce605777SToby Isaac 327ce605777SToby Isaac PetscFunctionBegin; 3284f572ea9SToby Isaac PetscAssertPointer(mapin, 1); 3294f572ea9SToby Isaac PetscAssertPointer(mapout, 2); 3309566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 331c9cc58a2SBarry Smith PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 3329566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mapin)); 3339566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mapout)); 3344f572ea9SToby Isaac if (mapin->n) PetscAssertPointer(keysin, 3); 3354f572ea9SToby Isaac if (mapout->n) PetscAssertPointer(keysout, 4); 33608401ef6SPierre Jolivet PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 3379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 338ce605777SToby Isaac if (size == 1) { 33948a46eb9SPierre Jolivet if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 3409566063dSJacob Faibussowitsch PetscCall(PetscSortInt(mapout->n, keysout)); 3413ba16761SJacob Faibussowitsch if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 342ce605777SToby Isaac } 343ce605777SToby Isaac if (keysout != keysin) { 3449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 3459566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 346ce605777SToby Isaac keysin = keysincopy; 347ce605777SToby Isaac } 3489566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 349ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 350ce605777SToby Isaac { 351ce605777SToby Isaac PetscBool sorted; 352ce605777SToby Isaac 3539566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 35428b400f6SJacob Faibussowitsch PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 355ce605777SToby Isaac } 356ce605777SToby Isaac #endif 3579566063dSJacob Faibussowitsch PetscCall(PetscFree(keysincopy)); 3583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 359ce605777SToby Isaac } 360