xref: /petsc/src/vec/is/utils/psort.c (revision ce60577764fdb6eb25584bc5af06f4b22e119d31)
1*ce605777SToby Isaac 
2*ce605777SToby Isaac #include <petsc/private/petscimpl.h>
3*ce605777SToby Isaac #include <petscis.h>
4*ce605777SToby Isaac 
5*ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7*ce605777SToby Isaac {
8*ce605777SToby Isaac   PetscInt       diff;
9*ce605777SToby Isaac   PetscInt       split, mid, partner;
10*ce605777SToby Isaac   PetscErrorCode ierr;
11*ce605777SToby Isaac 
12*ce605777SToby Isaac   PetscFunctionBegin;
13*ce605777SToby Isaac   diff = rankEnd - rankStart;
14*ce605777SToby Isaac   if (diff <= 0) PetscFunctionReturn(0);
15*ce605777SToby Isaac   if (diff == 1) {
16*ce605777SToby Isaac     if (forward) {
17*ce605777SToby Isaac       ierr = PetscSortInt((PetscInt) n, keys);CHKERRQ(ierr);
18*ce605777SToby Isaac     } else {
19*ce605777SToby Isaac       ierr = PetscSortReverseInt((PetscInt) n, keys);CHKERRQ(ierr);
20*ce605777SToby Isaac     }
21*ce605777SToby Isaac     PetscFunctionReturn(0);
22*ce605777SToby Isaac   }
23*ce605777SToby Isaac   split = 1;
24*ce605777SToby Isaac   while (2 * split < diff) split *= 2;
25*ce605777SToby Isaac   mid = rankStart + split;
26*ce605777SToby Isaac   if (rank < mid) {
27*ce605777SToby Isaac     partner = rank + split;
28*ce605777SToby Isaac   } else {
29*ce605777SToby Isaac     partner = rank - split;
30*ce605777SToby Isaac   }
31*ce605777SToby Isaac   if (partner < rankEnd) {
32*ce605777SToby Isaac     PetscMPIInt i;
33*ce605777SToby Isaac 
34*ce605777SToby Isaac     ierr = MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);CHKERRQ(ierr);
35*ce605777SToby Isaac     if ((rank < partner) == (forward == PETSC_TRUE)) {
36*ce605777SToby Isaac       for (i = 0; i < n; i++) {
37*ce605777SToby Isaac         keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
38*ce605777SToby Isaac       }
39*ce605777SToby Isaac     } else {
40*ce605777SToby Isaac       for (i = 0; i < n; i++) {
41*ce605777SToby Isaac         keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
42*ce605777SToby Isaac       }
43*ce605777SToby Isaac     }
44*ce605777SToby Isaac   }
45*ce605777SToby Isaac   /* divide and conquer */
46*ce605777SToby Isaac   if (rank < mid) {
47*ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);CHKERRQ(ierr);
48*ce605777SToby Isaac   } else {
49*ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
50*ce605777SToby Isaac   }
51*ce605777SToby Isaac   PetscFunctionReturn(0);
52*ce605777SToby Isaac }
53*ce605777SToby Isaac 
54*ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
55*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
56*ce605777SToby Isaac {
57*ce605777SToby Isaac   PetscInt       diff;
58*ce605777SToby Isaac   PetscInt       mid;
59*ce605777SToby Isaac   PetscErrorCode ierr;
60*ce605777SToby Isaac 
61*ce605777SToby Isaac   PetscFunctionBegin;
62*ce605777SToby Isaac   diff = rankEnd - rankStart;
63*ce605777SToby Isaac   if (diff <= 0) PetscFunctionReturn(0);
64*ce605777SToby Isaac   if (diff == 1) {
65*ce605777SToby Isaac     if (forward) {
66*ce605777SToby Isaac       ierr = PetscSortInt(n, keys);CHKERRQ(ierr);
67*ce605777SToby Isaac     } else {
68*ce605777SToby Isaac       ierr = PetscSortReverseInt(n, keys);CHKERRQ(ierr);
69*ce605777SToby Isaac     }
70*ce605777SToby Isaac     PetscFunctionReturn(0);
71*ce605777SToby Isaac   }
72*ce605777SToby Isaac   mid = rankStart + diff / 2;
73*ce605777SToby Isaac   /* divide and conquer */
74*ce605777SToby Isaac   if (rank < mid) {
75*ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, !forward);CHKERRQ(ierr);
76*ce605777SToby Isaac   } else {
77*ce605777SToby Isaac     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
78*ce605777SToby Isaac   }
79*ce605777SToby Isaac   /* bitonic merge */
80*ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
81*ce605777SToby Isaac   PetscFunctionReturn(0);
82*ce605777SToby Isaac }
83*ce605777SToby Isaac 
84*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
85*ce605777SToby Isaac {
86*ce605777SToby Isaac   PetscMPIInt size, rank, tag, mpin;
87*ce605777SToby Isaac   PetscInt       *buffer;
88*ce605777SToby Isaac   PetscErrorCode ierr;
89*ce605777SToby Isaac 
90*ce605777SToby Isaac   PetscFunctionBegin;
91*ce605777SToby Isaac   PetscValidIntPointer(keys, 3);
92*ce605777SToby Isaac   ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr);
93*ce605777SToby Isaac   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
94*ce605777SToby Isaac   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
95*ce605777SToby Isaac   ierr = PetscMPIIntCast(n, &mpin);CHKERRQ(ierr);
96*ce605777SToby Isaac   ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr);
97*ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);CHKERRQ(ierr);
98*ce605777SToby Isaac   ierr = PetscFree(buffer);CHKERRQ(ierr);
99*ce605777SToby Isaac   PetscFunctionReturn(0);
100*ce605777SToby Isaac }
101*ce605777SToby Isaac 
102*ce605777SToby Isaac static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
103*ce605777SToby Isaac {
104*ce605777SToby Isaac   PetscMPIInt    size, rank;
105*ce605777SToby Isaac   PetscInt       *pivots, *finalpivots, i;
106*ce605777SToby Isaac   PetscInt       non_empty, my_first, count;
107*ce605777SToby Isaac   PetscMPIInt    *keys_per, max_keys_per;
108*ce605777SToby Isaac   PetscErrorCode ierr;
109*ce605777SToby Isaac 
110*ce605777SToby Isaac   PetscFunctionBegin;
111*ce605777SToby Isaac   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
112*ce605777SToby Isaac   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr);
113*ce605777SToby Isaac 
114*ce605777SToby Isaac   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
115*ce605777SToby Isaac   ierr = PetscMalloc1(size - 1, &pivots);CHKERRQ(ierr);
116*ce605777SToby Isaac   for (i = 0; i < size - 1; i++) {
117*ce605777SToby Isaac     PetscInt index;
118*ce605777SToby Isaac 
119*ce605777SToby Isaac     if (!mapin->n) {
120*ce605777SToby Isaac       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
121*ce605777SToby Isaac        * processes are in the layout, so those values will be ignored from the end of the sorted
122*ce605777SToby Isaac        * pivots */
123*ce605777SToby Isaac       pivots[i] = PETSC_MAX_INT;
124*ce605777SToby Isaac     } else {
125*ce605777SToby Isaac       /* match the distribution to the desired output described by mapout by getting the index
126*ce605777SToby Isaac        * that is approximately the appropriate fraction through the list */
127*ce605777SToby Isaac       index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
128*ce605777SToby Isaac       index = PetscMin(index, (mapin->n - 1));
129*ce605777SToby Isaac       index = PetscMax(index, 0);
130*ce605777SToby Isaac       pivots[i] = keysin[index];
131*ce605777SToby Isaac     }
132*ce605777SToby Isaac   }
133*ce605777SToby Isaac   /* sort the pivots in parallel */
134*ce605777SToby Isaac   ierr = PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);CHKERRQ(ierr);
135*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
136*ce605777SToby Isaac   {
137*ce605777SToby Isaac     PetscBool sorted;
138*ce605777SToby Isaac 
139*ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);CHKERRQ(ierr);
140*ce605777SToby Isaac     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");CHKERRQ(ierr);
141*ce605777SToby Isaac   }
142*ce605777SToby Isaac #endif
143*ce605777SToby Isaac 
144*ce605777SToby Isaac   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
145*ce605777SToby Isaac    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
146*ce605777SToby Isaac   non_empty = size;
147*ce605777SToby Isaac   for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
148*ce605777SToby Isaac   ierr = PetscCalloc1(size + 1, &keys_per);CHKERRQ(ierr);
149*ce605777SToby Isaac   my_first = -1;
150*ce605777SToby Isaac   if (non_empty) {
151*ce605777SToby Isaac     for (i = 0; i < size - 1; i++) {
152*ce605777SToby Isaac       size_t sample = (i + 1) * non_empty - 1;
153*ce605777SToby Isaac       size_t sample_rank = sample / (size - 1);
154*ce605777SToby Isaac 
155*ce605777SToby Isaac       keys_per[sample_rank]++;
156*ce605777SToby Isaac       if (my_first < 0 && sample_rank == rank) {
157*ce605777SToby Isaac         my_first = (PetscInt) (sample - sample_rank * (size - 1));
158*ce605777SToby Isaac       }
159*ce605777SToby Isaac     }
160*ce605777SToby Isaac   }
161*ce605777SToby Isaac   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
162*ce605777SToby Isaac   ierr = PetscMalloc1(size * max_keys_per, &finalpivots);CHKERRQ(ierr);
163*ce605777SToby Isaac   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
164*ce605777SToby Isaac    * and allgather them */
165*ce605777SToby Isaac   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
166*ce605777SToby Isaac   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
167*ce605777SToby Isaac   ierr = MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
168*ce605777SToby Isaac   for (i = 0, count = 0; i < size; i++) {
169*ce605777SToby Isaac     PetscInt j;
170*ce605777SToby Isaac 
171*ce605777SToby Isaac     for (j = 0; j < max_keys_per; j++) {
172*ce605777SToby Isaac       if (j < keys_per[i]) {
173*ce605777SToby Isaac         finalpivots[count++] = finalpivots[i * max_keys_per + j];
174*ce605777SToby Isaac       }
175*ce605777SToby Isaac     }
176*ce605777SToby Isaac   }
177*ce605777SToby Isaac   *outpivots = finalpivots;
178*ce605777SToby Isaac   ierr = PetscFree(keys_per);CHKERRQ(ierr);
179*ce605777SToby Isaac   ierr = PetscFree(pivots);CHKERRQ(ierr);
180*ce605777SToby Isaac   PetscFunctionReturn(0);
181*ce605777SToby Isaac }
182*ce605777SToby Isaac 
183*ce605777SToby Isaac static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
184*ce605777SToby Isaac {
185*ce605777SToby Isaac   PetscMPIInt    size, rank;
186*ce605777SToby Isaac   PetscInt       myOffset, nextOffset;
187*ce605777SToby Isaac   PetscInt       i;
188*ce605777SToby Isaac   PetscMPIInt    total, filled;
189*ce605777SToby Isaac   PetscMPIInt    sender, nfirst, nsecond;
190*ce605777SToby Isaac   PetscMPIInt    firsttag, secondtag;
191*ce605777SToby Isaac   MPI_Request    firstreqrcv;
192*ce605777SToby Isaac   MPI_Request    *firstreqs;
193*ce605777SToby Isaac   MPI_Request    *secondreqs;
194*ce605777SToby Isaac   MPI_Status     firststatus;
195*ce605777SToby Isaac 
196*ce605777SToby Isaac   PetscErrorCode ierr;
197*ce605777SToby Isaac 
198*ce605777SToby Isaac   PetscFunctionBegin;
199*ce605777SToby Isaac   ierr = MPI_Comm_size(map->comm, &size);CHKERRQ(ierr);
200*ce605777SToby Isaac   ierr = MPI_Comm_rank(map->comm, &rank);CHKERRQ(ierr);
201*ce605777SToby Isaac   ierr = PetscCommGetNewTag(map->comm, &firsttag);CHKERRQ(ierr);
202*ce605777SToby Isaac   ierr = PetscCommGetNewTag(map->comm, &secondtag);CHKERRQ(ierr);
203*ce605777SToby Isaac   myOffset = 0;
204*ce605777SToby Isaac   ierr = PetscMalloc2(size, &firstreqs, size, &secondreqs);CHKERRQ(ierr);
205*ce605777SToby Isaac   ierr = MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);CHKERRQ(ierr);
206*ce605777SToby Isaac   myOffset = nextOffset - n;
207*ce605777SToby Isaac   total = map->range[rank + 1] - map->range[rank];
208*ce605777SToby Isaac   if (total > 0) {
209*ce605777SToby Isaac     ierr = MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);CHKERRQ(ierr);
210*ce605777SToby Isaac   }
211*ce605777SToby Isaac   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
212*ce605777SToby Isaac     PetscInt itotal;
213*ce605777SToby Isaac     PetscInt overlap, oStart, oEnd;
214*ce605777SToby Isaac 
215*ce605777SToby Isaac     itotal = map->range[i + 1] - map->range[i];
216*ce605777SToby Isaac     if (itotal <= 0) continue;
217*ce605777SToby Isaac     oStart = PetscMax(myOffset, map->range[i]);
218*ce605777SToby Isaac     oEnd   = PetscMin(nextOffset, map->range[i + 1]);
219*ce605777SToby Isaac     overlap = oEnd - oStart;
220*ce605777SToby Isaac     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
221*ce605777SToby Isaac       /* send first message */
222*ce605777SToby Isaac       ierr = MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));CHKERRQ(ierr);
223*ce605777SToby Isaac     } else if (overlap > 0) {
224*ce605777SToby Isaac       /* send second message */
225*ce605777SToby Isaac       ierr = MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr);
226*ce605777SToby Isaac     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
227*ce605777SToby Isaac       /* send empty second message */
228*ce605777SToby Isaac       ierr = MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr);
229*ce605777SToby Isaac     }
230*ce605777SToby Isaac   }
231*ce605777SToby Isaac   filled = 0;
232*ce605777SToby Isaac   sender = -1;
233*ce605777SToby Isaac   if (total > 0) {
234*ce605777SToby Isaac     ierr = MPI_Wait(&firstreqrcv, &firststatus);CHKERRQ(ierr);
235*ce605777SToby Isaac     sender = firststatus.MPI_SOURCE;
236*ce605777SToby Isaac     ierr = MPI_Get_count(&firststatus, MPIU_INT, &filled);CHKERRQ(ierr);
237*ce605777SToby Isaac   }
238*ce605777SToby Isaac   while (filled < total) {
239*ce605777SToby Isaac     PetscMPIInt mfilled;
240*ce605777SToby Isaac     MPI_Status stat;
241*ce605777SToby Isaac 
242*ce605777SToby Isaac     sender++;
243*ce605777SToby Isaac     ierr = MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);CHKERRQ(ierr);
244*ce605777SToby Isaac     ierr = MPI_Get_count(&stat, MPIU_INT, &mfilled);CHKERRQ(ierr);
245*ce605777SToby Isaac     filled += mfilled;
246*ce605777SToby Isaac   }
247*ce605777SToby Isaac   ierr = MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
248*ce605777SToby Isaac   ierr = MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
249*ce605777SToby Isaac   ierr = PetscFree2(firstreqs, secondreqs);CHKERRQ(ierr);
250*ce605777SToby Isaac   PetscFunctionReturn(0);
251*ce605777SToby Isaac }
252*ce605777SToby Isaac 
253*ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
254*ce605777SToby Isaac {
255*ce605777SToby Isaac   PetscMPIInt    size, rank;
256*ce605777SToby Isaac   PetscInt       *pivots, *buffer;
257*ce605777SToby Isaac   PetscInt       i, j;
258*ce605777SToby Isaac   PetscMPIInt    *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
259*ce605777SToby Isaac   PetscErrorCode ierr;
260*ce605777SToby Isaac 
261*ce605777SToby Isaac   PetscFunctionBegin;
262*ce605777SToby Isaac   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
263*ce605777SToby Isaac   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr);
264*ce605777SToby Isaac   ierr = PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);CHKERRQ(ierr);
265*ce605777SToby Isaac   /* sort locally */
266*ce605777SToby Isaac   ierr = PetscSortInt(mapin->n, keysin);CHKERRQ(ierr);
267*ce605777SToby Isaac   /* get P - 1 pivots */
268*ce605777SToby Isaac   ierr = PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);CHKERRQ(ierr);
269*ce605777SToby Isaac   /* determine which entries in the sorted array go to which other processes based on the pivots */
270*ce605777SToby Isaac   for (i = 0, j = 0; i < size - 1; i++) {
271*ce605777SToby Isaac     PetscInt prev = j;
272*ce605777SToby Isaac 
273*ce605777SToby Isaac     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
274*ce605777SToby Isaac     offsets_snd[i] = prev;
275*ce605777SToby Isaac     keys_per_snd[i] = j - prev;
276*ce605777SToby Isaac   }
277*ce605777SToby Isaac   offsets_snd[size - 1] = j;
278*ce605777SToby Isaac   keys_per_snd[size - 1] = mapin->n - j;
279*ce605777SToby Isaac   offsets_snd[size] = mapin->n;
280*ce605777SToby Isaac   /* get the incoming sizes */
281*ce605777SToby Isaac   ierr = MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);CHKERRQ(ierr);
282*ce605777SToby Isaac   offsets_rcv[0] = 0;
283*ce605777SToby Isaac   for (i = 0; i < size; i++) {
284*ce605777SToby Isaac     offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
285*ce605777SToby Isaac   }
286*ce605777SToby Isaac   nrecv = offsets_rcv[size];
287*ce605777SToby Isaac   /* all to all exchange */
288*ce605777SToby Isaac   ierr = PetscMalloc1(nrecv, &buffer);CHKERRQ(ierr);
289*ce605777SToby Isaac   ierr = MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);CHKERRQ(ierr);
290*ce605777SToby Isaac   ierr = PetscFree(pivots);CHKERRQ(ierr);
291*ce605777SToby Isaac   ierr = PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);CHKERRQ(ierr);
292*ce605777SToby Isaac 
293*ce605777SToby Isaac   /* local sort */
294*ce605777SToby Isaac   ierr = PetscSortInt(nrecv, buffer);CHKERRQ(ierr);
295*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
296*ce605777SToby Isaac   {
297*ce605777SToby Isaac     PetscBool sorted;
298*ce605777SToby Isaac 
299*ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);CHKERRQ(ierr);
300*ce605777SToby Isaac     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");CHKERRQ(ierr);
301*ce605777SToby Isaac   }
302*ce605777SToby Isaac #endif
303*ce605777SToby Isaac 
304*ce605777SToby Isaac   /* redistribute to the desired order */
305*ce605777SToby Isaac   ierr = PetscParallelRedistribute(mapout, nrecv, buffer, keysout);CHKERRQ(ierr);
306*ce605777SToby Isaac   ierr = PetscFree(buffer);CHKERRQ(ierr);
307*ce605777SToby Isaac   PetscFunctionReturn(0);
308*ce605777SToby Isaac }
309*ce605777SToby Isaac 
310*ce605777SToby Isaac /*@
311*ce605777SToby Isaac   PetscParallelSortInt - Globally sort a distrbuted array of integers
312*ce605777SToby Isaac 
313*ce605777SToby Isaac   Collective
314*ce605777SToby Isaac 
315*ce605777SToby Isaac   Input Parameters:
316*ce605777SToby Isaac + mapin - PetscLayout describing the distribution of the input keys
317*ce605777SToby Isaac . mapout - PetscLayout describing the distribution of the output keys
318*ce605777SToby Isaac - keysin - the pre-sorted array of integers
319*ce605777SToby Isaac 
320*ce605777SToby Isaac   Output Parameters:
321*ce605777SToby Isaac . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
322*ce605777SToby Isaac 
323*ce605777SToby Isaac   Level: developer
324*ce605777SToby Isaac 
325*ce605777SToby Isaac   Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
326*ce605777SToby Isaac 
327*ce605777SToby Isaac   - sorts locally
328*ce605777SToby Isaac   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
329*ce605777SToby Isaac   - using to the pivots to repartition the keys by all-to-all exchange
330*ce605777SToby Isaac   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
331*ce605777SToby Isaac   - redistributing to match the mapout layout
332*ce605777SToby Isaac 
333*ce605777SToby Isaac   If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
334*ce605777SToby Isaac 
335*ce605777SToby Isaac .seealso: PetscParallelSortedInt()
336*ce605777SToby Isaac @*/
337*ce605777SToby Isaac PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
338*ce605777SToby Isaac {
339*ce605777SToby Isaac   PetscMPIInt    size;
340*ce605777SToby Isaac   PetscMPIInt    result;
341*ce605777SToby Isaac   PetscInt       *keysincopy = NULL;
342*ce605777SToby Isaac   PetscErrorCode ierr;
343*ce605777SToby Isaac 
344*ce605777SToby Isaac   PetscFunctionBegin;
345*ce605777SToby Isaac   PetscValidPointer(mapin, 1);
346*ce605777SToby Isaac   PetscValidPointer(mapout, 2);
347*ce605777SToby Isaac   ierr = MPI_Comm_compare(mapin->comm, mapout->comm, &result);CHKERRQ(ierr);
348*ce605777SToby Isaac   if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
349*ce605777SToby Isaac   ierr = PetscLayoutSetUp(mapin);CHKERRQ(ierr);
350*ce605777SToby Isaac   ierr = PetscLayoutSetUp(mapout);CHKERRQ(ierr);
351*ce605777SToby Isaac   if (mapin->n) PetscValidIntPointer(keysin, 3);
352*ce605777SToby Isaac   if (mapout->n) PetscValidIntPointer(keysout, 4);
353*ce605777SToby Isaac   if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N);
354*ce605777SToby Isaac   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
355*ce605777SToby Isaac   if (size == 1) {
356*ce605777SToby Isaac     if (keysout != keysin) {
357*ce605777SToby Isaac       ierr = PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
358*ce605777SToby Isaac     }
359*ce605777SToby Isaac     ierr = PetscSortInt(mapout->n, keysout);CHKERRQ(ierr);
360*ce605777SToby Isaac     if (size == 1) PetscFunctionReturn(0);
361*ce605777SToby Isaac   }
362*ce605777SToby Isaac   if (keysout != keysin) {
363*ce605777SToby Isaac     ierr = PetscMalloc1(mapin->n, &keysincopy);CHKERRQ(ierr);
364*ce605777SToby Isaac     ierr = PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
365*ce605777SToby Isaac     keysin = keysincopy;
366*ce605777SToby Isaac   }
367*ce605777SToby Isaac   ierr = PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);CHKERRQ(ierr);
368*ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
369*ce605777SToby Isaac   {
370*ce605777SToby Isaac     PetscBool sorted;
371*ce605777SToby Isaac 
372*ce605777SToby Isaac     ierr = PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);CHKERRQ(ierr);
373*ce605777SToby Isaac     if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");CHKERRQ(ierr);
374*ce605777SToby Isaac   }
375*ce605777SToby Isaac #endif
376*ce605777SToby Isaac   ierr = PetscFree(keysincopy);CHKERRQ(ierr);
377*ce605777SToby Isaac   PetscFunctionReturn(0);
378*ce605777SToby Isaac }
379