1ce605777SToby Isaac 2ce605777SToby Isaac #include <petsc/private/petscimpl.h> 3579e05a6SToby Isaac #include <petscis.h> /*I "petscis.h" I*/ 4ce605777SToby Isaac 5ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 7ce605777SToby Isaac { 8ce605777SToby Isaac PetscInt diff; 9ce605777SToby Isaac PetscInt split, mid, partner; 10ce605777SToby Isaac 11ce605777SToby Isaac PetscFunctionBegin; 12ce605777SToby Isaac diff = rankEnd - rankStart; 13ce605777SToby Isaac if (diff <= 0) PetscFunctionReturn(0); 14ce605777SToby Isaac if (diff == 1) { 15ce605777SToby Isaac if (forward) { 165f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt((PetscInt) n, keys)); 17ce605777SToby Isaac } else { 185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortReverseInt((PetscInt) n, keys)); 19ce605777SToby Isaac } 20ce605777SToby Isaac PetscFunctionReturn(0); 21ce605777SToby Isaac } 22ce605777SToby Isaac split = 1; 23ce605777SToby Isaac while (2 * split < diff) split *= 2; 24ce605777SToby Isaac mid = rankStart + split; 25ce605777SToby Isaac if (rank < mid) { 26ce605777SToby Isaac partner = rank + split; 27ce605777SToby Isaac } else { 28ce605777SToby Isaac partner = rank - split; 29ce605777SToby Isaac } 30ce605777SToby Isaac if (partner < rankEnd) { 31ce605777SToby Isaac PetscMPIInt i; 32ce605777SToby Isaac 335f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 34ce605777SToby Isaac if ((rank < partner) == (forward == PETSC_TRUE)) { 35ce605777SToby Isaac for (i = 0; i < n; i++) { 36ce605777SToby Isaac keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 37ce605777SToby Isaac } 38ce605777SToby Isaac } else { 39ce605777SToby Isaac for (i = 0; i < n; i++) { 40ce605777SToby Isaac keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 41ce605777SToby Isaac } 42ce605777SToby Isaac } 43ce605777SToby Isaac } 44ce605777SToby Isaac /* divide and conquer */ 45ce605777SToby Isaac if (rank < mid) { 465f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 47ce605777SToby Isaac } else { 485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 49ce605777SToby Isaac } 50ce605777SToby Isaac PetscFunctionReturn(0); 51ce605777SToby Isaac } 52ce605777SToby Isaac 53ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 54ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 55ce605777SToby Isaac { 56ce605777SToby Isaac PetscInt diff; 57ce605777SToby Isaac PetscInt mid; 58ce605777SToby Isaac 59ce605777SToby Isaac PetscFunctionBegin; 60ce605777SToby Isaac diff = rankEnd - rankStart; 61ce605777SToby Isaac if (diff <= 0) PetscFunctionReturn(0); 62ce605777SToby Isaac if (diff == 1) { 63ce605777SToby Isaac if (forward) { 645f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt(n, keys)); 65ce605777SToby Isaac } else { 665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortReverseInt(n, keys)); 67ce605777SToby Isaac } 68ce605777SToby Isaac PetscFunctionReturn(0); 69ce605777SToby Isaac } 70ce605777SToby Isaac mid = rankStart + diff / 2; 71ce605777SToby Isaac /* divide and conquer */ 72ce605777SToby Isaac if (rank < mid) { 735f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward)); 74ce605777SToby Isaac } else { 755f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 76ce605777SToby Isaac } 77ce605777SToby Isaac /* bitonic merge */ 785f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 79ce605777SToby Isaac PetscFunctionReturn(0); 80ce605777SToby Isaac } 81ce605777SToby Isaac 82ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 83ce605777SToby Isaac { 84ce605777SToby Isaac PetscMPIInt size, rank, tag, mpin; 85ce605777SToby Isaac PetscInt *buffer; 86ce605777SToby Isaac 87ce605777SToby Isaac PetscFunctionBegin; 88ce605777SToby Isaac PetscValidIntPointer(keys, 3); 895f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommGetNewTag(comm, &tag)); 905f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(comm, &size)); 915f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(comm, &rank)); 925f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMPIIntCast(n, &mpin)); 935f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n, &buffer)); 945f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 955f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(buffer)); 96ce605777SToby Isaac PetscFunctionReturn(0); 97ce605777SToby Isaac } 98ce605777SToby Isaac 99ce605777SToby Isaac static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 100ce605777SToby Isaac { 101ce605777SToby Isaac PetscMPIInt size, rank; 102ce605777SToby Isaac PetscInt *pivots, *finalpivots, i; 103ce605777SToby Isaac PetscInt non_empty, my_first, count; 104ce605777SToby Isaac PetscMPIInt *keys_per, max_keys_per; 105ce605777SToby Isaac 106ce605777SToby Isaac PetscFunctionBegin; 1075f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(mapin->comm, &size)); 1085f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(mapin->comm, &rank)); 109ce605777SToby Isaac 110ce605777SToby Isaac /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 1115f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(size - 1, &pivots)); 112ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 113ce605777SToby Isaac PetscInt index; 114ce605777SToby Isaac 115ce605777SToby Isaac if (!mapin->n) { 116ce605777SToby Isaac /* if this rank is empty, put "infinity" in the list. each process knows how many empty 117ce605777SToby Isaac * processes are in the layout, so those values will be ignored from the end of the sorted 118ce605777SToby Isaac * pivots */ 119ce605777SToby Isaac pivots[i] = PETSC_MAX_INT; 120ce605777SToby Isaac } else { 121ce605777SToby Isaac /* match the distribution to the desired output described by mapout by getting the index 122ce605777SToby Isaac * that is approximately the appropriate fraction through the list */ 123ce605777SToby Isaac index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N); 124ce605777SToby Isaac index = PetscMin(index, (mapin->n - 1)); 125ce605777SToby Isaac index = PetscMax(index, 0); 126ce605777SToby Isaac pivots[i] = keysin[index]; 127ce605777SToby Isaac } 128ce605777SToby Isaac } 129ce605777SToby Isaac /* sort the pivots in parallel */ 1305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 13176bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 132ce605777SToby Isaac PetscBool sorted; 133ce605777SToby Isaac 1345f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 135*28b400f6SJacob Faibussowitsch PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 136ce605777SToby Isaac } 137ce605777SToby Isaac 138ce605777SToby Isaac /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 139ce605777SToby Isaac * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 140ce605777SToby Isaac non_empty = size; 141ce605777SToby Isaac for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--; 1425f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(size + 1, &keys_per)); 143ce605777SToby Isaac my_first = -1; 144ce605777SToby Isaac if (non_empty) { 145ce605777SToby Isaac for (i = 0; i < size - 1; i++) { 146ce605777SToby Isaac size_t sample = (i + 1) * non_empty - 1; 147ce605777SToby Isaac size_t sample_rank = sample / (size - 1); 148ce605777SToby Isaac 149ce605777SToby Isaac keys_per[sample_rank]++; 1506b2dd37fSToby Isaac if (my_first < 0 && (PetscMPIInt) sample_rank == rank) { 151ce605777SToby Isaac my_first = (PetscInt) (sample - sample_rank * (size - 1)); 152ce605777SToby Isaac } 153ce605777SToby Isaac } 154ce605777SToby Isaac } 155ce605777SToby Isaac for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 1565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(size * max_keys_per, &finalpivots)); 157ce605777SToby Isaac /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 158ce605777SToby Isaac * and allgather them */ 159ce605777SToby Isaac for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 160ce605777SToby Isaac for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 1615f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 162ce605777SToby Isaac for (i = 0, count = 0; i < size; i++) { 163ce605777SToby Isaac PetscInt j; 164ce605777SToby Isaac 165ce605777SToby Isaac for (j = 0; j < max_keys_per; j++) { 166ce605777SToby Isaac if (j < keys_per[i]) { 167ce605777SToby Isaac finalpivots[count++] = finalpivots[i * max_keys_per + j]; 168ce605777SToby Isaac } 169ce605777SToby Isaac } 170ce605777SToby Isaac } 171ce605777SToby Isaac *outpivots = finalpivots; 1725f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(keys_per)); 1735f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(pivots)); 174ce605777SToby Isaac PetscFunctionReturn(0); 175ce605777SToby Isaac } 176ce605777SToby Isaac 177ce605777SToby Isaac static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 178ce605777SToby Isaac { 179ce605777SToby Isaac PetscMPIInt size, rank; 180ce605777SToby Isaac PetscInt myOffset, nextOffset; 181ce605777SToby Isaac PetscInt i; 182ce605777SToby Isaac PetscMPIInt total, filled; 183ce605777SToby Isaac PetscMPIInt sender, nfirst, nsecond; 184ce605777SToby Isaac PetscMPIInt firsttag, secondtag; 185ce605777SToby Isaac MPI_Request firstreqrcv; 186ce605777SToby Isaac MPI_Request *firstreqs; 187ce605777SToby Isaac MPI_Request *secondreqs; 188ce605777SToby Isaac MPI_Status firststatus; 189ce605777SToby Isaac 190ce605777SToby Isaac PetscFunctionBegin; 1915f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(map->comm, &size)); 1925f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(map->comm, &rank)); 1935f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommGetNewTag(map->comm, &firsttag)); 1945f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommGetNewTag(map->comm, &secondtag)); 195ce605777SToby Isaac myOffset = 0; 1965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 1975f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 198ce605777SToby Isaac myOffset = nextOffset - n; 199ce605777SToby Isaac total = map->range[rank + 1] - map->range[rank]; 200ce605777SToby Isaac if (total > 0) { 2015f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 202ce605777SToby Isaac } 203ce605777SToby Isaac for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 204ce605777SToby Isaac PetscInt itotal; 205ce605777SToby Isaac PetscInt overlap, oStart, oEnd; 206ce605777SToby Isaac 207ce605777SToby Isaac itotal = map->range[i + 1] - map->range[i]; 208ce605777SToby Isaac if (itotal <= 0) continue; 209ce605777SToby Isaac oStart = PetscMax(myOffset, map->range[i]); 210ce605777SToby Isaac oEnd = PetscMin(nextOffset, map->range[i + 1]); 211ce605777SToby Isaac overlap = oEnd - oStart; 212ce605777SToby Isaac if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 213ce605777SToby Isaac /* send first message */ 2145f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]))); 215ce605777SToby Isaac } else if (overlap > 0) { 216ce605777SToby Isaac /* send second message */ 2175f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 218ce605777SToby Isaac } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 219ce605777SToby Isaac /* send empty second message */ 2205f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 221ce605777SToby Isaac } 222ce605777SToby Isaac } 223ce605777SToby Isaac filled = 0; 224ce605777SToby Isaac sender = -1; 225ce605777SToby Isaac if (total > 0) { 2265f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Wait(&firstreqrcv, &firststatus)); 227ce605777SToby Isaac sender = firststatus.MPI_SOURCE; 2285f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled)); 229ce605777SToby Isaac } 230ce605777SToby Isaac while (filled < total) { 231ce605777SToby Isaac PetscMPIInt mfilled; 232ce605777SToby Isaac MPI_Status stat; 233ce605777SToby Isaac 234ce605777SToby Isaac sender++; 2355f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat)); 2365f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled)); 237ce605777SToby Isaac filled += mfilled; 238ce605777SToby Isaac } 2395f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 2405f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 2415f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(firstreqs, secondreqs)); 242ce605777SToby Isaac PetscFunctionReturn(0); 243ce605777SToby Isaac } 244ce605777SToby Isaac 245ce605777SToby Isaac static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 246ce605777SToby Isaac { 247ce605777SToby Isaac PetscMPIInt size, rank; 2486df2664eSToby Isaac PetscInt *pivots = NULL, *buffer; 249ce605777SToby Isaac PetscInt i, j; 250ce605777SToby Isaac PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 251ce605777SToby Isaac 252ce605777SToby Isaac PetscFunctionBegin; 2535f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(mapin->comm, &size)); 2545f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(mapin->comm, &rank)); 2555f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 256ce605777SToby Isaac /* sort locally */ 2575f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt(mapin->n, keysin)); 258ce605777SToby Isaac /* get P - 1 pivots */ 2595f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 260ce605777SToby Isaac /* determine which entries in the sorted array go to which other processes based on the pivots */ 261ce605777SToby Isaac for (i = 0, j = 0; i < size - 1; i++) { 262ce605777SToby Isaac PetscInt prev = j; 263ce605777SToby Isaac 264ce605777SToby Isaac while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 265ce605777SToby Isaac offsets_snd[i] = prev; 266ce605777SToby Isaac keys_per_snd[i] = j - prev; 267ce605777SToby Isaac } 268ce605777SToby Isaac offsets_snd[size - 1] = j; 269ce605777SToby Isaac keys_per_snd[size - 1] = mapin->n - j; 270ce605777SToby Isaac offsets_snd[size] = mapin->n; 271ce605777SToby Isaac /* get the incoming sizes */ 2725f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 273ce605777SToby Isaac offsets_rcv[0] = 0; 274ce605777SToby Isaac for (i = 0; i < size; i++) { 275ce605777SToby Isaac offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i]; 276ce605777SToby Isaac } 277ce605777SToby Isaac nrecv = offsets_rcv[size]; 278ce605777SToby Isaac /* all to all exchange */ 2795f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nrecv, &buffer)); 2805f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 2815f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(pivots)); 2825f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 283ce605777SToby Isaac 284ce605777SToby Isaac /* local sort */ 2855f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt(nrecv, buffer)); 286ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 287ce605777SToby Isaac { 288ce605777SToby Isaac PetscBool sorted; 289ce605777SToby Isaac 2905f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 291*28b400f6SJacob Faibussowitsch PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 292ce605777SToby Isaac } 293ce605777SToby Isaac #endif 294ce605777SToby Isaac 295ce605777SToby Isaac /* redistribute to the desired order */ 2965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 2975f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(buffer)); 298ce605777SToby Isaac PetscFunctionReturn(0); 299ce605777SToby Isaac } 300ce605777SToby Isaac 301ce605777SToby Isaac /*@ 302064ec1f3Sprj- PetscParallelSortInt - Globally sort a distributed array of integers 303ce605777SToby Isaac 304ce605777SToby Isaac Collective 305ce605777SToby Isaac 306ce605777SToby Isaac Input Parameters: 307ce605777SToby Isaac + mapin - PetscLayout describing the distribution of the input keys 308ce605777SToby Isaac . mapout - PetscLayout describing the distribution of the output keys 309ce605777SToby Isaac - keysin - the pre-sorted array of integers 310ce605777SToby Isaac 311064ec1f3Sprj- Output Parameter: 312ce605777SToby Isaac . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 313ce605777SToby Isaac 314ce605777SToby Isaac Level: developer 315ce605777SToby Isaac 316ce605777SToby Isaac Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 317ce605777SToby Isaac 318ce605777SToby Isaac - sorts locally 319ce605777SToby Isaac - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 320ce605777SToby Isaac - using to the pivots to repartition the keys by all-to-all exchange 321ce605777SToby Isaac - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 322ce605777SToby Isaac - redistributing to match the mapout layout 323ce605777SToby Isaac 324ce605777SToby Isaac If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 325ce605777SToby Isaac 326ce605777SToby Isaac .seealso: PetscParallelSortedInt() 327ce605777SToby Isaac @*/ 328ce605777SToby Isaac PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 329ce605777SToby Isaac { 330ce605777SToby Isaac PetscMPIInt size; 331ce605777SToby Isaac PetscMPIInt result; 332ce605777SToby Isaac PetscInt *keysincopy = NULL; 333ce605777SToby Isaac 334ce605777SToby Isaac PetscFunctionBegin; 335ce605777SToby Isaac PetscValidPointer(mapin, 1); 336ce605777SToby Isaac PetscValidPointer(mapout, 2); 3375f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 3382c71b3e2SJacob Faibussowitsch PetscCheckFalse(result != MPI_IDENT && result != MPI_CONGRUENT,mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 3395f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(mapin)); 3405f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(mapout)); 341ce605777SToby Isaac if (mapin->n) PetscValidIntPointer(keysin, 3); 342ce605777SToby Isaac if (mapout->n) PetscValidIntPointer(keysout, 4); 3432c71b3e2SJacob Faibussowitsch PetscCheckFalse(mapin->N != mapout->N,mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 3445f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(mapin->comm, &size)); 345ce605777SToby Isaac if (size == 1) { 346ce605777SToby Isaac if (keysout != keysin) { 3475f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 348ce605777SToby Isaac } 3495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt(mapout->n, keysout)); 350ce605777SToby Isaac if (size == 1) PetscFunctionReturn(0); 351ce605777SToby Isaac } 352ce605777SToby Isaac if (keysout != keysin) { 3535f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(mapin->n, &keysincopy)); 3545f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 355ce605777SToby Isaac keysin = keysincopy; 356ce605777SToby Isaac } 3575f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 358ce605777SToby Isaac #if defined(PETSC_USE_DEBUG) 359ce605777SToby Isaac { 360ce605777SToby Isaac PetscBool sorted; 361ce605777SToby Isaac 3625f80ce2aSJacob Faibussowitsch CHKERRQ(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 363*28b400f6SJacob Faibussowitsch PetscCheck(sorted,mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 364ce605777SToby Isaac } 365ce605777SToby Isaac #endif 3665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(keysincopy)); 367ce605777SToby Isaac PetscFunctionReturn(0); 368ce605777SToby Isaac } 369