xref: /petsc/src/vec/is/utils/psort.c (revision e33f79d88bab5f9023ea6be42dc5dbb69a57d50a)
1ce605777SToby Isaac 
2ce605777SToby Isaac #include <petsc/private/petscimpl.h>
3579e05a6SToby Isaac #include <petscis.h> /*I "petscis.h" I*/
4ce605777SToby Isaac 
5ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7d71ae5a4SJacob Faibussowitsch {
8ce605777SToby Isaac   PetscInt diff;
9ce605777SToby Isaac   PetscInt split, mid, partner;
10ce605777SToby Isaac 
11ce605777SToby Isaac   PetscFunctionBegin;
12ce605777SToby Isaac   diff = rankEnd - rankStart;
133ba16761SJacob Faibussowitsch   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
14ce605777SToby Isaac   if (diff == 1) {
15ce605777SToby Isaac     if (forward) {
169566063dSJacob Faibussowitsch       PetscCall(PetscSortInt((PetscInt)n, keys));
17ce605777SToby Isaac     } else {
189566063dSJacob Faibussowitsch       PetscCall(PetscSortReverseInt((PetscInt)n, keys));
19ce605777SToby Isaac     }
203ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
21ce605777SToby Isaac   }
22ce605777SToby Isaac   split = 1;
23ce605777SToby Isaac   while (2 * split < diff) split *= 2;
24ce605777SToby Isaac   mid = rankStart + split;
25ce605777SToby Isaac   if (rank < mid) {
26ce605777SToby Isaac     partner = rank + split;
27ce605777SToby Isaac   } else {
28ce605777SToby Isaac     partner = rank - split;
29ce605777SToby Isaac   }
30ce605777SToby Isaac   if (partner < rankEnd) {
31ce605777SToby Isaac     PetscMPIInt i;
32ce605777SToby Isaac 
339566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
34ce605777SToby Isaac     if ((rank < partner) == (forward == PETSC_TRUE)) {
35ad540459SPierre Jolivet       for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
36ce605777SToby Isaac     } else {
37ad540459SPierre Jolivet       for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
38ce605777SToby Isaac     }
39ce605777SToby Isaac   }
40ce605777SToby Isaac   /* divide and conquer */
41ce605777SToby Isaac   if (rank < mid) {
429566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
43ce605777SToby Isaac   } else {
449566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
45ce605777SToby Isaac   }
463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
47ce605777SToby Isaac }
48ce605777SToby Isaac 
49ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
50d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
51d71ae5a4SJacob Faibussowitsch {
52ce605777SToby Isaac   PetscInt diff;
53ce605777SToby Isaac   PetscInt mid;
54ce605777SToby Isaac 
55ce605777SToby Isaac   PetscFunctionBegin;
56ce605777SToby Isaac   diff = rankEnd - rankStart;
573ba16761SJacob Faibussowitsch   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
58ce605777SToby Isaac   if (diff == 1) {
59ce605777SToby Isaac     if (forward) {
609566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(n, keys));
61ce605777SToby Isaac     } else {
629566063dSJacob Faibussowitsch       PetscCall(PetscSortReverseInt(n, keys));
63ce605777SToby Isaac     }
643ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
65ce605777SToby Isaac   }
66ce605777SToby Isaac   mid = rankStart + diff / 2;
67ce605777SToby Isaac   /* divide and conquer */
68ce605777SToby Isaac   if (rank < mid) {
699566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
70ce605777SToby Isaac   } else {
719566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
72ce605777SToby Isaac   }
73ce605777SToby Isaac   /* bitonic merge */
749566063dSJacob Faibussowitsch   PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
76ce605777SToby Isaac }
77ce605777SToby Isaac 
78d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
79d71ae5a4SJacob Faibussowitsch {
80ce605777SToby Isaac   PetscMPIInt size, rank, tag, mpin;
81ce605777SToby Isaac   PetscInt   *buffer;
82ce605777SToby Isaac 
83ce605777SToby Isaac   PetscFunctionBegin;
84ce605777SToby Isaac   PetscValidIntPointer(keys, 3);
859566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm, &tag));
869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
889566063dSJacob Faibussowitsch   PetscCall(PetscMPIIntCast(n, &mpin));
899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &buffer));
909566063dSJacob Faibussowitsch   PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
919566063dSJacob Faibussowitsch   PetscCall(PetscFree(buffer));
923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
93ce605777SToby Isaac }
94ce605777SToby Isaac 
95d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
96d71ae5a4SJacob Faibussowitsch {
97ce605777SToby Isaac   PetscMPIInt  size, rank;
98ce605777SToby Isaac   PetscInt    *pivots, *finalpivots, i;
99ce605777SToby Isaac   PetscInt     non_empty, my_first, count;
100ce605777SToby Isaac   PetscMPIInt *keys_per, max_keys_per;
101ce605777SToby Isaac 
102ce605777SToby Isaac   PetscFunctionBegin;
1039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
1049566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
105ce605777SToby Isaac 
106ce605777SToby Isaac   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
1079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size - 1, &pivots));
108ce605777SToby Isaac   for (i = 0; i < size - 1; i++) {
109ce605777SToby Isaac     PetscInt index;
110ce605777SToby Isaac 
111ce605777SToby Isaac     if (!mapin->n) {
112ce605777SToby Isaac       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
113ce605777SToby Isaac        * processes are in the layout, so those values will be ignored from the end of the sorted
114ce605777SToby Isaac        * pivots */
115ce605777SToby Isaac       pivots[i] = PETSC_MAX_INT;
116ce605777SToby Isaac     } else {
117ce605777SToby Isaac       /* match the distribution to the desired output described by mapout by getting the index
118ce605777SToby Isaac        * that is approximately the appropriate fraction through the list */
119ce605777SToby Isaac       index     = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
120ce605777SToby Isaac       index     = PetscMin(index, (mapin->n - 1));
121ce605777SToby Isaac       index     = PetscMax(index, 0);
122ce605777SToby Isaac       pivots[i] = keysin[index];
123ce605777SToby Isaac     }
124ce605777SToby Isaac   }
125ce605777SToby Isaac   /* sort the pivots in parallel */
1269566063dSJacob Faibussowitsch   PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
12776bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
128ce605777SToby Isaac     PetscBool sorted;
129ce605777SToby Isaac 
1309566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
13128b400f6SJacob Faibussowitsch     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
132ce605777SToby Isaac   }
133ce605777SToby Isaac 
134ce605777SToby Isaac   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
135ce605777SToby Isaac    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
136ce605777SToby Isaac   non_empty = size;
1379371c9d4SSatish Balay   for (i = 0; i < size; i++)
1389371c9d4SSatish Balay     if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
1399566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size + 1, &keys_per));
140ce605777SToby Isaac   my_first = -1;
141ce605777SToby Isaac   if (non_empty) {
142ce605777SToby Isaac     for (i = 0; i < size - 1; i++) {
143ce605777SToby Isaac       size_t sample      = (i + 1) * non_empty - 1;
144ce605777SToby Isaac       size_t sample_rank = sample / (size - 1);
145ce605777SToby Isaac 
146ce605777SToby Isaac       keys_per[sample_rank]++;
147ad540459SPierre Jolivet       if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
148ce605777SToby Isaac     }
149ce605777SToby Isaac   }
150ce605777SToby Isaac   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
1519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
152ce605777SToby Isaac   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
153ce605777SToby Isaac    * and allgather them */
154ce605777SToby Isaac   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
155ce605777SToby Isaac   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
1569566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
157ce605777SToby Isaac   for (i = 0, count = 0; i < size; i++) {
158ce605777SToby Isaac     PetscInt j;
159ce605777SToby Isaac 
160ce605777SToby Isaac     for (j = 0; j < max_keys_per; j++) {
161ad540459SPierre Jolivet       if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
162ce605777SToby Isaac     }
163ce605777SToby Isaac   }
164ce605777SToby Isaac   *outpivots = finalpivots;
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree(keys_per));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(pivots));
1673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
168ce605777SToby Isaac }
169ce605777SToby Isaac 
170d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
171d71ae5a4SJacob Faibussowitsch {
172ce605777SToby Isaac   PetscMPIInt  size, rank;
173ce605777SToby Isaac   PetscInt     myOffset, nextOffset;
174ce605777SToby Isaac   PetscInt     i;
175ce605777SToby Isaac   PetscMPIInt  total, filled;
176ce605777SToby Isaac   PetscMPIInt  sender, nfirst, nsecond;
177ce605777SToby Isaac   PetscMPIInt  firsttag, secondtag;
178ce605777SToby Isaac   MPI_Request  firstreqrcv;
179ce605777SToby Isaac   MPI_Request *firstreqs;
180ce605777SToby Isaac   MPI_Request *secondreqs;
181ce605777SToby Isaac   MPI_Status   firststatus;
182ce605777SToby Isaac 
183ce605777SToby Isaac   PetscFunctionBegin;
1849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1859566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
1869566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
1879566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
188ce605777SToby Isaac   myOffset = 0;
1899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
1909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
191ce605777SToby Isaac   myOffset = nextOffset - n;
192ce605777SToby Isaac   total    = map->range[rank + 1] - map->range[rank];
19348a46eb9SPierre Jolivet   if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
194ce605777SToby Isaac   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
195ce605777SToby Isaac     PetscInt itotal;
196ce605777SToby Isaac     PetscInt overlap, oStart, oEnd;
197ce605777SToby Isaac 
198ce605777SToby Isaac     itotal = map->range[i + 1] - map->range[i];
199ce605777SToby Isaac     if (itotal <= 0) continue;
200ce605777SToby Isaac     oStart  = PetscMax(myOffset, map->range[i]);
201ce605777SToby Isaac     oEnd    = PetscMin(nextOffset, map->range[i + 1]);
202ce605777SToby Isaac     overlap = oEnd - oStart;
203ce605777SToby Isaac     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
204ce605777SToby Isaac       /* send first message */
2059566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
206ce605777SToby Isaac     } else if (overlap > 0) {
207ce605777SToby Isaac       /* send second message */
2089566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
209ce605777SToby Isaac     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
210ce605777SToby Isaac       /* send empty second message */
2119566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
212ce605777SToby Isaac     }
213ce605777SToby Isaac   }
214ce605777SToby Isaac   filled = 0;
215ce605777SToby Isaac   sender = -1;
216ce605777SToby Isaac   if (total > 0) {
2179566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
218ce605777SToby Isaac     sender = firststatus.MPI_SOURCE;
2199566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
220ce605777SToby Isaac   }
221ce605777SToby Isaac   while (filled < total) {
222ce605777SToby Isaac     PetscMPIInt mfilled;
223ce605777SToby Isaac     MPI_Status  stat;
224ce605777SToby Isaac 
225ce605777SToby Isaac     sender++;
2269566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
2279566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
228ce605777SToby Isaac     filled += mfilled;
229ce605777SToby Isaac   }
2309566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
2319566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
2329566063dSJacob Faibussowitsch   PetscCall(PetscFree2(firstreqs, secondreqs));
2333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
234ce605777SToby Isaac }
235ce605777SToby Isaac 
236d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
237d71ae5a4SJacob Faibussowitsch {
238ce605777SToby Isaac   PetscMPIInt  size, rank;
2396df2664eSToby Isaac   PetscInt    *pivots = NULL, *buffer;
240ce605777SToby Isaac   PetscInt     i, j;
241ce605777SToby Isaac   PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
242ce605777SToby Isaac 
243ce605777SToby Isaac   PetscFunctionBegin;
2449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
2459566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
2469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
247ce605777SToby Isaac   /* sort locally */
2489566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(mapin->n, keysin));
249ce605777SToby Isaac   /* get P - 1 pivots */
2509566063dSJacob Faibussowitsch   PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
251ce605777SToby Isaac   /* determine which entries in the sorted array go to which other processes based on the pivots */
252ce605777SToby Isaac   for (i = 0, j = 0; i < size - 1; i++) {
253ce605777SToby Isaac     PetscInt prev = j;
254ce605777SToby Isaac 
255ce605777SToby Isaac     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
256ce605777SToby Isaac     offsets_snd[i]  = prev;
257ce605777SToby Isaac     keys_per_snd[i] = j - prev;
258ce605777SToby Isaac   }
259ce605777SToby Isaac   offsets_snd[size - 1]  = j;
260ce605777SToby Isaac   keys_per_snd[size - 1] = mapin->n - j;
261ce605777SToby Isaac   offsets_snd[size]      = mapin->n;
262ce605777SToby Isaac   /* get the incoming sizes */
2639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
264ce605777SToby Isaac   offsets_rcv[0] = 0;
265ad540459SPierre Jolivet   for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
266ce605777SToby Isaac   nrecv = offsets_rcv[size];
267ce605777SToby Isaac   /* all to all exchange */
2689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrecv, &buffer));
2699566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
2709566063dSJacob Faibussowitsch   PetscCall(PetscFree(pivots));
2719566063dSJacob Faibussowitsch   PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
272ce605777SToby Isaac 
273ce605777SToby Isaac   /* local sort */
2749566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(nrecv, buffer));
275ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
276ce605777SToby Isaac   {
277ce605777SToby Isaac     PetscBool sorted;
278ce605777SToby Isaac 
2799566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
28028b400f6SJacob Faibussowitsch     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
281ce605777SToby Isaac   }
282ce605777SToby Isaac #endif
283ce605777SToby Isaac 
284ce605777SToby Isaac   /* redistribute to the desired order */
2859566063dSJacob Faibussowitsch   PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
2869566063dSJacob Faibussowitsch   PetscCall(PetscFree(buffer));
2873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
288ce605777SToby Isaac }
289ce605777SToby Isaac 
290ce605777SToby Isaac /*@
291064ec1f3Sprj-   PetscParallelSortInt - Globally sort a distributed array of integers
292ce605777SToby Isaac 
293ce605777SToby Isaac   Collective
294ce605777SToby Isaac 
295ce605777SToby Isaac   Input Parameters:
296cab54364SBarry Smith + mapin  - `PetscLayout` describing the distribution of the input keys
297cab54364SBarry Smith . mapout - `PetscLayout` describing the desired distribution of the output keys
298ce605777SToby Isaac - keysin - the pre-sorted array of integers
299ce605777SToby Isaac 
300064ec1f3Sprj-   Output Parameter:
301ce605777SToby Isaac . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
302ce605777SToby Isaac 
303ce605777SToby Isaac   Level: developer
304ce605777SToby Isaac 
305cab54364SBarry Smith   Notes:
30638b5cf2dSJacob Faibussowitsch 
307*e33f79d8SJacob Faibussowitsch   This implements a distributed samplesort, which, with local array sizes n_in and n_out,
308*e33f79d8SJacob Faibussowitsch   global size N, and global number of MPI processes P, does\:
309cab54364SBarry Smith .vb
310ce605777SToby Isaac   - sorts locally
311ce605777SToby Isaac   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
312ce605777SToby Isaac   - using to the pivots to repartition the keys by all-to-all exchange
313ce605777SToby Isaac   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
314ce605777SToby Isaac   - redistributing to match the mapout layout
315cab54364SBarry Smith .ve
316ce605777SToby Isaac 
317*e33f79d8SJacob Faibussowitsch   If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`.
318ce605777SToby Isaac 
319cab54364SBarry Smith .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
320ce605777SToby Isaac @*/
321d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
322d71ae5a4SJacob Faibussowitsch {
323ce605777SToby Isaac   PetscMPIInt size;
324ce605777SToby Isaac   PetscMPIInt result;
325ce605777SToby Isaac   PetscInt   *keysincopy = NULL;
326ce605777SToby Isaac 
327ce605777SToby Isaac   PetscFunctionBegin;
328ce605777SToby Isaac   PetscValidPointer(mapin, 1);
329ce605777SToby Isaac   PetscValidPointer(mapout, 2);
3309566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
331c9cc58a2SBarry Smith   PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
3329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mapin));
3339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mapout));
334ce605777SToby Isaac   if (mapin->n) PetscValidIntPointer(keysin, 3);
335ce605777SToby Isaac   if (mapout->n) PetscValidIntPointer(keysout, 4);
33608401ef6SPierre Jolivet   PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
3379566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
338ce605777SToby Isaac   if (size == 1) {
33948a46eb9SPierre Jolivet     if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
3409566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(mapout->n, keysout));
3413ba16761SJacob Faibussowitsch     if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
342ce605777SToby Isaac   }
343ce605777SToby Isaac   if (keysout != keysin) {
3449566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(mapin->n, &keysincopy));
3459566063dSJacob Faibussowitsch     PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
346ce605777SToby Isaac     keysin = keysincopy;
347ce605777SToby Isaac   }
3489566063dSJacob Faibussowitsch   PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
349ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
350ce605777SToby Isaac   {
351ce605777SToby Isaac     PetscBool sorted;
352ce605777SToby Isaac 
3539566063dSJacob Faibussowitsch     PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
35428b400f6SJacob Faibussowitsch     PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
355ce605777SToby Isaac   }
356ce605777SToby Isaac #endif
3579566063dSJacob Faibussowitsch   PetscCall(PetscFree(keysincopy));
3583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
359ce605777SToby Isaac }
360