xref: /petsc/src/vec/is/utils/psort.c (revision 1e1ea65d8de51fde77ce8a787efbef25e407badc)
1ce605777SToby Isaac 
2ce605777SToby Isaac #include <petsc/private/petscimpl.h>
3579e05a6SToby Isaac #include <petscis.h> /*I "petscis.h" I*/
4ce605777SToby Isaac 
5ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7ce605777SToby Isaac {
8ce605777SToby Isaac   PetscInt       diff;
9ce605777SToby Isaac   PetscInt       split, mid, partner;
10ce605777SToby Isaac   PetscErrorCode ierr;
11ce605777SToby Isaac 
12ce605777SToby Isaac   PetscFunctionBegin;
13ce605777SToby Isaac   diff = rankEnd - rankStart;
14ce605777SToby Isaac   if (diff <= 0) PetscFunctionReturn(0);
15ce605777SToby Isaac   if (diff == 1) {
16ce605777SToby Isaac     if (forward) {
17ce605777SToby Isaac       ierr = PetscSortInt((PetscInt) n, keys);CHKERRQ(ierr);
18ce605777SToby Isaac     } else {
19ce605777SToby Isaac       ierr = PetscSortReverseInt((PetscInt) n, keys);CHKERRQ(ierr);
20ce605777SToby Isaac     }
21ce605777SToby Isaac     PetscFunctionReturn(0);
22ce605777SToby Isaac   }
23ce605777SToby Isaac   split = 1;
24ce605777SToby Isaac   while (2 * split < diff) split *= 2;
25ce605777SToby Isaac   mid = rankStart + split;
26ce605777SToby Isaac   if (rank < mid) {
27ce605777SToby Isaac     partner = rank + split;
28ce605777SToby Isaac   } else {
29ce605777SToby Isaac     partner = rank - split;
30ce605777SToby Isaac   }
31ce605777SToby Isaac   if (partner < rankEnd) {
32ce605777SToby Isaac     PetscMPIInt i;
33ce605777SToby Isaac 
34ffc4695bSBarry Smith     ierr = MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);CHKERRMPI(ierr);
35ce605777SToby Isaac     if ((rank < partner) == (forward == PETSC_TRUE)) {
36ce605777SToby Isaac       for (i = 0; i < n; i++) {
37ce605777SToby Isaac         keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
38ce605777SToby Isaac       }
39ce605777SToby Isaac     } else {
40ce605777SToby Isaac       for (i = 0; i < n; i++) {
41ce605777SToby Isaac         keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
42ce605777SToby Isaac       }
43ce605777SToby Isaac     }
44ce605777SToby Isaac   }
45ce605777SToby Isaac   /* divide and conquer */
46ce605777SToby Isaac   if (rank < mid) {
47ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);CHKERRQ(ierr);
48ce605777SToby Isaac   } else {
49ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
50ce605777SToby Isaac   }
51ce605777SToby Isaac   PetscFunctionReturn(0);
52ce605777SToby Isaac }
53ce605777SToby Isaac 
54ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
55ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
56ce605777SToby Isaac {
57ce605777SToby Isaac   PetscInt       diff;
58ce605777SToby Isaac   PetscInt       mid;
59ce605777SToby Isaac   PetscErrorCode ierr;
60ce605777SToby Isaac 
61ce605777SToby Isaac   PetscFunctionBegin;
62ce605777SToby Isaac   diff = rankEnd - rankStart;
63ce605777SToby Isaac   if (diff <= 0) PetscFunctionReturn(0);
64ce605777SToby Isaac   if (diff == 1) {
65ce605777SToby Isaac     if (forward) {
66ce605777SToby Isaac       ierr = PetscSortInt(n, keys);CHKERRQ(ierr);
67ce605777SToby Isaac     } else {
68ce605777SToby Isaac       ierr = PetscSortReverseInt(n, keys);CHKERRQ(ierr);
69ce605777SToby Isaac     }
70ce605777SToby Isaac     PetscFunctionReturn(0);
71ce605777SToby Isaac   }
72ce605777SToby Isaac   mid = rankStart + diff / 2;
73ce605777SToby Isaac   /* divide and conquer */
74ce605777SToby Isaac   if (rank < mid) {
753af7d6b8SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward);CHKERRQ(ierr);
76ce605777SToby Isaac   } else {
77ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
78ce605777SToby Isaac   }
79ce605777SToby Isaac   /* bitonic merge */
80ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
81ce605777SToby Isaac   PetscFunctionReturn(0);
82ce605777SToby Isaac }
83ce605777SToby Isaac 
84ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
85ce605777SToby Isaac {
86ce605777SToby Isaac   PetscMPIInt size, rank, tag, mpin;
87ce605777SToby Isaac   PetscInt       *buffer;
88ce605777SToby Isaac   PetscErrorCode ierr;
89ce605777SToby Isaac 
90ce605777SToby Isaac   PetscFunctionBegin;
91ce605777SToby Isaac   PetscValidIntPointer(keys, 3);
92ce605777SToby Isaac   ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr);
93ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
94ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
95ce605777SToby Isaac   ierr = PetscMPIIntCast(n, &mpin);CHKERRQ(ierr);
96ce605777SToby Isaac   ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr);
97ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);CHKERRQ(ierr);
98ce605777SToby Isaac   ierr = PetscFree(buffer);CHKERRQ(ierr);
99ce605777SToby Isaac   PetscFunctionReturn(0);
100ce605777SToby Isaac }
101ce605777SToby Isaac 
102ce605777SToby Isaac static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
103ce605777SToby Isaac {
104ce605777SToby Isaac   PetscMPIInt    size, rank;
105ce605777SToby Isaac   PetscInt       *pivots, *finalpivots, i;
106ce605777SToby Isaac   PetscInt       non_empty, my_first, count;
107ce605777SToby Isaac   PetscMPIInt    *keys_per, max_keys_per;
108ce605777SToby Isaac   PetscErrorCode ierr;
109ce605777SToby Isaac 
110ce605777SToby Isaac   PetscFunctionBegin;
111ffc4695bSBarry Smith   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRMPI(ierr);
112ffc4695bSBarry Smith   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRMPI(ierr);
113ce605777SToby Isaac 
114ce605777SToby Isaac   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
115ce605777SToby Isaac   ierr = PetscMalloc1(size - 1, &pivots);CHKERRQ(ierr);
116ce605777SToby Isaac   for (i = 0; i < size - 1; i++) {
117ce605777SToby Isaac     PetscInt index;
118ce605777SToby Isaac 
119ce605777SToby Isaac     if (!mapin->n) {
120ce605777SToby Isaac       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
121ce605777SToby Isaac        * processes are in the layout, so those values will be ignored from the end of the sorted
122ce605777SToby Isaac        * pivots */
123ce605777SToby Isaac       pivots[i] = PETSC_MAX_INT;
124ce605777SToby Isaac     } else {
125ce605777SToby Isaac       /* match the distribution to the desired output described by mapout by getting the index
126ce605777SToby Isaac        * that is approximately the appropriate fraction through the list */
127ce605777SToby Isaac       index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
128ce605777SToby Isaac       index = PetscMin(index, (mapin->n - 1));
129ce605777SToby Isaac       index = PetscMax(index, 0);
130ce605777SToby Isaac       pivots[i] = keysin[index];
131ce605777SToby Isaac     }
132ce605777SToby Isaac   }
133ce605777SToby Isaac   /* sort the pivots in parallel */
134ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);CHKERRQ(ierr);
13576bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
136ce605777SToby Isaac     PetscBool sorted;
137ce605777SToby Isaac 
138ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);CHKERRQ(ierr);
1392758c9b9SBarry Smith     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
140ce605777SToby Isaac   }
141ce605777SToby Isaac 
142ce605777SToby Isaac   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
143ce605777SToby Isaac    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
144ce605777SToby Isaac   non_empty = size;
145ce605777SToby Isaac   for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
146ce605777SToby Isaac   ierr = PetscCalloc1(size + 1, &keys_per);CHKERRQ(ierr);
147ce605777SToby Isaac   my_first = -1;
148ce605777SToby Isaac   if (non_empty) {
149ce605777SToby Isaac     for (i = 0; i < size - 1; i++) {
150ce605777SToby Isaac       size_t sample = (i + 1) * non_empty - 1;
151ce605777SToby Isaac       size_t sample_rank = sample / (size - 1);
152ce605777SToby Isaac 
153ce605777SToby Isaac       keys_per[sample_rank]++;
1546b2dd37fSToby Isaac       if (my_first < 0 && (PetscMPIInt) sample_rank == rank) {
155ce605777SToby Isaac         my_first = (PetscInt) (sample - sample_rank * (size - 1));
156ce605777SToby Isaac       }
157ce605777SToby Isaac     }
158ce605777SToby Isaac   }
159ce605777SToby Isaac   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
160ce605777SToby Isaac   ierr = PetscMalloc1(size * max_keys_per, &finalpivots);CHKERRQ(ierr);
161ce605777SToby Isaac   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
162ce605777SToby Isaac    * and allgather them */
163ce605777SToby Isaac   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
164ce605777SToby Isaac   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
165*1e1ea65dSPierre Jolivet   ierr = MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);CHKERRMPI(ierr);
166ce605777SToby Isaac   for (i = 0, count = 0; i < size; i++) {
167ce605777SToby Isaac     PetscInt j;
168ce605777SToby Isaac 
169ce605777SToby Isaac     for (j = 0; j < max_keys_per; j++) {
170ce605777SToby Isaac       if (j < keys_per[i]) {
171ce605777SToby Isaac         finalpivots[count++] = finalpivots[i * max_keys_per + j];
172ce605777SToby Isaac       }
173ce605777SToby Isaac     }
174ce605777SToby Isaac   }
175ce605777SToby Isaac   *outpivots = finalpivots;
176ce605777SToby Isaac   ierr = PetscFree(keys_per);CHKERRQ(ierr);
177ce605777SToby Isaac   ierr = PetscFree(pivots);CHKERRQ(ierr);
178ce605777SToby Isaac   PetscFunctionReturn(0);
179ce605777SToby Isaac }
180ce605777SToby Isaac 
181ce605777SToby Isaac static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
182ce605777SToby Isaac {
183ce605777SToby Isaac   PetscMPIInt    size, rank;
184ce605777SToby Isaac   PetscInt       myOffset, nextOffset;
185ce605777SToby Isaac   PetscInt       i;
186ce605777SToby Isaac   PetscMPIInt    total, filled;
187ce605777SToby Isaac   PetscMPIInt    sender, nfirst, nsecond;
188ce605777SToby Isaac   PetscMPIInt    firsttag, secondtag;
189ce605777SToby Isaac   MPI_Request    firstreqrcv;
190ce605777SToby Isaac   MPI_Request    *firstreqs;
191ce605777SToby Isaac   MPI_Request    *secondreqs;
192ce605777SToby Isaac   MPI_Status     firststatus;
193ce605777SToby Isaac 
194ce605777SToby Isaac   PetscErrorCode ierr;
195ce605777SToby Isaac 
196ce605777SToby Isaac   PetscFunctionBegin;
197ffc4695bSBarry Smith   ierr = MPI_Comm_size(map->comm, &size);CHKERRMPI(ierr);
198ffc4695bSBarry Smith   ierr = MPI_Comm_rank(map->comm, &rank);CHKERRMPI(ierr);
199ce605777SToby Isaac   ierr = PetscCommGetNewTag(map->comm, &firsttag);CHKERRQ(ierr);
200ce605777SToby Isaac   ierr = PetscCommGetNewTag(map->comm, &secondtag);CHKERRQ(ierr);
201ce605777SToby Isaac   myOffset = 0;
202ce605777SToby Isaac   ierr = PetscMalloc2(size, &firstreqs, size, &secondreqs);CHKERRQ(ierr);
203ffc4695bSBarry Smith   ierr = MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);CHKERRMPI(ierr);
204ce605777SToby Isaac   myOffset = nextOffset - n;
205ce605777SToby Isaac   total = map->range[rank + 1] - map->range[rank];
206ce605777SToby Isaac   if (total > 0) {
207ffc4695bSBarry Smith     ierr = MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);CHKERRMPI(ierr);
208ce605777SToby Isaac   }
209ce605777SToby Isaac   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
210ce605777SToby Isaac     PetscInt itotal;
211ce605777SToby Isaac     PetscInt overlap, oStart, oEnd;
212ce605777SToby Isaac 
213ce605777SToby Isaac     itotal = map->range[i + 1] - map->range[i];
214ce605777SToby Isaac     if (itotal <= 0) continue;
215ce605777SToby Isaac     oStart = PetscMax(myOffset, map->range[i]);
216ce605777SToby Isaac     oEnd   = PetscMin(nextOffset, map->range[i + 1]);
217ce605777SToby Isaac     overlap = oEnd - oStart;
218ce605777SToby Isaac     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
219ce605777SToby Isaac       /* send first message */
220ffc4695bSBarry Smith       ierr = MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));CHKERRMPI(ierr);
221ce605777SToby Isaac     } else if (overlap > 0) {
222ce605777SToby Isaac       /* send second message */
223ffc4695bSBarry Smith       ierr = MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRMPI(ierr);
224ce605777SToby Isaac     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
225ce605777SToby Isaac       /* send empty second message */
226ffc4695bSBarry Smith       ierr = MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRMPI(ierr);
227ce605777SToby Isaac     }
228ce605777SToby Isaac   }
229ce605777SToby Isaac   filled = 0;
230ce605777SToby Isaac   sender = -1;
231ce605777SToby Isaac   if (total > 0) {
232ffc4695bSBarry Smith     ierr = MPI_Wait(&firstreqrcv, &firststatus);CHKERRMPI(ierr);
233ce605777SToby Isaac     sender = firststatus.MPI_SOURCE;
234ffc4695bSBarry Smith     ierr = MPI_Get_count(&firststatus, MPIU_INT, &filled);CHKERRMPI(ierr);
235ce605777SToby Isaac   }
236ce605777SToby Isaac   while (filled < total) {
237ce605777SToby Isaac     PetscMPIInt mfilled;
238ce605777SToby Isaac     MPI_Status stat;
239ce605777SToby Isaac 
240ce605777SToby Isaac     sender++;
241ffc4695bSBarry Smith     ierr = MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);CHKERRMPI(ierr);
242ffc4695bSBarry Smith     ierr = MPI_Get_count(&stat, MPIU_INT, &mfilled);CHKERRMPI(ierr);
243ce605777SToby Isaac     filled += mfilled;
244ce605777SToby Isaac   }
245ffc4695bSBarry Smith   ierr = MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
246ffc4695bSBarry Smith   ierr = MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
247ce605777SToby Isaac   ierr = PetscFree2(firstreqs, secondreqs);CHKERRQ(ierr);
248ce605777SToby Isaac   PetscFunctionReturn(0);
249ce605777SToby Isaac }
250ce605777SToby Isaac 
251ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
252ce605777SToby Isaac {
253ce605777SToby Isaac   PetscMPIInt    size, rank;
2546df2664eSToby Isaac   PetscInt       *pivots = NULL, *buffer;
255ce605777SToby Isaac   PetscInt       i, j;
256ce605777SToby Isaac   PetscMPIInt    *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
257ce605777SToby Isaac   PetscErrorCode ierr;
258ce605777SToby Isaac 
259ce605777SToby Isaac   PetscFunctionBegin;
260ffc4695bSBarry Smith   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRMPI(ierr);
261ffc4695bSBarry Smith   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRMPI(ierr);
262ce605777SToby Isaac   ierr = PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);CHKERRQ(ierr);
263ce605777SToby Isaac   /* sort locally */
264ce605777SToby Isaac   ierr = PetscSortInt(mapin->n, keysin);CHKERRQ(ierr);
265ce605777SToby Isaac   /* get P - 1 pivots */
266ce605777SToby Isaac   ierr = PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);CHKERRQ(ierr);
267ce605777SToby Isaac   /* determine which entries in the sorted array go to which other processes based on the pivots */
268ce605777SToby Isaac   for (i = 0, j = 0; i < size - 1; i++) {
269ce605777SToby Isaac     PetscInt prev = j;
270ce605777SToby Isaac 
271ce605777SToby Isaac     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
272ce605777SToby Isaac     offsets_snd[i] = prev;
273ce605777SToby Isaac     keys_per_snd[i] = j - prev;
274ce605777SToby Isaac   }
275ce605777SToby Isaac   offsets_snd[size - 1] = j;
276ce605777SToby Isaac   keys_per_snd[size - 1] = mapin->n - j;
277ce605777SToby Isaac   offsets_snd[size] = mapin->n;
278ce605777SToby Isaac   /* get the incoming sizes */
279ffc4695bSBarry Smith   ierr = MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);CHKERRMPI(ierr);
280ce605777SToby Isaac   offsets_rcv[0] = 0;
281ce605777SToby Isaac   for (i = 0; i < size; i++) {
282ce605777SToby Isaac     offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
283ce605777SToby Isaac   }
284ce605777SToby Isaac   nrecv = offsets_rcv[size];
285ce605777SToby Isaac   /* all to all exchange */
286ce605777SToby Isaac   ierr = PetscMalloc1(nrecv, &buffer);CHKERRQ(ierr);
287ffc4695bSBarry Smith   ierr = MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);CHKERRMPI(ierr);
288ce605777SToby Isaac   ierr = PetscFree(pivots);CHKERRQ(ierr);
289ce605777SToby Isaac   ierr = PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);CHKERRQ(ierr);
290ce605777SToby Isaac 
291ce605777SToby Isaac   /* local sort */
292ce605777SToby Isaac   ierr = PetscSortInt(nrecv, buffer);CHKERRQ(ierr);
293ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
294ce605777SToby Isaac   {
295ce605777SToby Isaac     PetscBool sorted;
296ce605777SToby Isaac 
297ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);CHKERRQ(ierr);
2982758c9b9SBarry Smith     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
299ce605777SToby Isaac   }
300ce605777SToby Isaac #endif
301ce605777SToby Isaac 
302ce605777SToby Isaac   /* redistribute to the desired order */
303ce605777SToby Isaac   ierr = PetscParallelRedistribute(mapout, nrecv, buffer, keysout);CHKERRQ(ierr);
304ce605777SToby Isaac   ierr = PetscFree(buffer);CHKERRQ(ierr);
305ce605777SToby Isaac   PetscFunctionReturn(0);
306ce605777SToby Isaac }
307ce605777SToby Isaac 
308ce605777SToby Isaac /*@
309064ec1f3Sprj-   PetscParallelSortInt - Globally sort a distributed array of integers
310ce605777SToby Isaac 
311ce605777SToby Isaac   Collective
312ce605777SToby Isaac 
313ce605777SToby Isaac   Input Parameters:
314ce605777SToby Isaac + mapin - PetscLayout describing the distribution of the input keys
315ce605777SToby Isaac . mapout - PetscLayout describing the distribution of the output keys
316ce605777SToby Isaac - keysin - the pre-sorted array of integers
317ce605777SToby Isaac 
318064ec1f3Sprj-   Output Parameter:
319ce605777SToby Isaac . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
320ce605777SToby Isaac 
321ce605777SToby Isaac   Level: developer
322ce605777SToby Isaac 
323ce605777SToby Isaac   Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
324ce605777SToby Isaac 
325ce605777SToby Isaac   - sorts locally
326ce605777SToby Isaac   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
327ce605777SToby Isaac   - using to the pivots to repartition the keys by all-to-all exchange
328ce605777SToby Isaac   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
329ce605777SToby Isaac   - redistributing to match the mapout layout
330ce605777SToby Isaac 
331ce605777SToby Isaac   If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
332ce605777SToby Isaac 
333ce605777SToby Isaac .seealso: PetscParallelSortedInt()
334ce605777SToby Isaac @*/
335ce605777SToby Isaac PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
336ce605777SToby Isaac {
337ce605777SToby Isaac   PetscMPIInt    size;
338ce605777SToby Isaac   PetscMPIInt    result;
339ce605777SToby Isaac   PetscInt       *keysincopy = NULL;
340ce605777SToby Isaac   PetscErrorCode ierr;
341ce605777SToby Isaac 
342ce605777SToby Isaac   PetscFunctionBegin;
343ce605777SToby Isaac   PetscValidPointer(mapin, 1);
344ce605777SToby Isaac   PetscValidPointer(mapout, 2);
345ffc4695bSBarry Smith   ierr = MPI_Comm_compare(mapin->comm, mapout->comm, &result);CHKERRMPI(ierr);
346ce605777SToby Isaac   if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
347ce605777SToby Isaac   ierr = PetscLayoutSetUp(mapin);CHKERRQ(ierr);
348ce605777SToby Isaac   ierr = PetscLayoutSetUp(mapout);CHKERRQ(ierr);
349ce605777SToby Isaac   if (mapin->n) PetscValidIntPointer(keysin, 3);
350ce605777SToby Isaac   if (mapout->n) PetscValidIntPointer(keysout, 4);
351ce605777SToby Isaac   if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N);
352ffc4695bSBarry Smith   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRMPI(ierr);
353ce605777SToby Isaac   if (size == 1) {
354ce605777SToby Isaac     if (keysout != keysin) {
355ce605777SToby Isaac       ierr = PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
356ce605777SToby Isaac     }
357ce605777SToby Isaac     ierr = PetscSortInt(mapout->n, keysout);CHKERRQ(ierr);
358ce605777SToby Isaac     if (size == 1) PetscFunctionReturn(0);
359ce605777SToby Isaac   }
360ce605777SToby Isaac   if (keysout != keysin) {
361ce605777SToby Isaac     ierr = PetscMalloc1(mapin->n, &keysincopy);CHKERRQ(ierr);
362ce605777SToby Isaac     ierr = PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
363ce605777SToby Isaac     keysin = keysincopy;
364ce605777SToby Isaac   }
365ce605777SToby Isaac   ierr = PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);CHKERRQ(ierr);
366ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
367ce605777SToby Isaac   {
368ce605777SToby Isaac     PetscBool sorted;
369ce605777SToby Isaac 
370ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);CHKERRQ(ierr);
3712758c9b9SBarry Smith     if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
372ce605777SToby Isaac   }
373ce605777SToby Isaac #endif
374ce605777SToby Isaac   ierr = PetscFree(keysincopy);CHKERRQ(ierr);
375ce605777SToby Isaac   PetscFunctionReturn(0);
376ce605777SToby Isaac }
377