Actual source code: psort.c
petsc-3.13.6 2020-09-29
2: #include <petsc/private/petscimpl.h>
3: #include <petscis.h>
5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7: {
8: PetscInt diff;
9: PetscInt split, mid, partner;
13: diff = rankEnd - rankStart;
14: if (diff <= 0) return(0);
15: if (diff == 1) {
16: if (forward) {
17: PetscSortInt((PetscInt) n, keys);
18: } else {
19: PetscSortReverseInt((PetscInt) n, keys);
20: }
21: return(0);
22: }
23: split = 1;
24: while (2 * split < diff) split *= 2;
25: mid = rankStart + split;
26: if (rank < mid) {
27: partner = rank + split;
28: } else {
29: partner = rank - split;
30: }
31: if (partner < rankEnd) {
32: PetscMPIInt i;
34: MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);
35: if ((rank < partner) == (forward == PETSC_TRUE)) {
36: for (i = 0; i < n; i++) {
37: keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
38: }
39: } else {
40: for (i = 0; i < n; i++) {
41: keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
42: }
43: }
44: }
45: /* divide and conquer */
46: if (rank < mid) {
47: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);
48: } else {
49: PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
50: }
51: return(0);
52: }
54: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
55: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
56: {
57: PetscInt diff;
58: PetscInt mid;
62: diff = rankEnd - rankStart;
63: if (diff <= 0) return(0);
64: if (diff == 1) {
65: if (forward) {
66: PetscSortInt(n, keys);
67: } else {
68: PetscSortReverseInt(n, keys);
69: }
70: return(0);
71: }
72: mid = rankStart + diff / 2;
73: /* divide and conquer */
74: if (rank < mid) {
75: PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward);
76: } else {
77: PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
78: }
79: /* bitonic merge */
80: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);
81: return(0);
82: }
84: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
85: {
86: PetscMPIInt size, rank, tag, mpin;
87: PetscInt *buffer;
92: PetscCommGetNewTag(comm, &tag);
93: MPI_Comm_size(comm, &size);
94: MPI_Comm_rank(comm, &rank);
95: PetscMPIIntCast(n, &mpin);
96: PetscMalloc1(n, &buffer);
97: PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);
98: PetscFree(buffer);
99: return(0);
100: }
102: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
103: {
104: PetscMPIInt size, rank;
105: PetscInt *pivots, *finalpivots, i;
106: PetscInt non_empty, my_first, count;
107: PetscMPIInt *keys_per, max_keys_per;
111: MPI_Comm_size(mapin->comm, &size);
112: MPI_Comm_rank(mapin->comm, &rank);
114: /* Choose P - 1 pivots that would be ideal for the distribution on this process */
115: PetscMalloc1(size - 1, &pivots);
116: for (i = 0; i < size - 1; i++) {
117: PetscInt index;
119: if (!mapin->n) {
120: /* if this rank is empty, put "infinity" in the list. each process knows how many empty
121: * processes are in the layout, so those values will be ignored from the end of the sorted
122: * pivots */
123: pivots[i] = PETSC_MAX_INT;
124: } else {
125: /* match the distribution to the desired output described by mapout by getting the index
126: * that is approximately the appropriate fraction through the list */
127: index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
128: index = PetscMin(index, (mapin->n - 1));
129: index = PetscMax(index, 0);
130: pivots[i] = keysin[index];
131: }
132: }
133: /* sort the pivots in parallel */
134: PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);
135: #if defined(PETSC_USE_DEBUG)
136: {
137: PetscBool sorted;
139: PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);
140: if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
141: }
142: #endif
144: /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
145: * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
146: non_empty = size;
147: for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
148: PetscCalloc1(size + 1, &keys_per);
149: my_first = -1;
150: if (non_empty) {
151: for (i = 0; i < size - 1; i++) {
152: size_t sample = (i + 1) * non_empty - 1;
153: size_t sample_rank = sample / (size - 1);
155: keys_per[sample_rank]++;
156: if (my_first < 0 && (PetscMPIInt) sample_rank == rank) {
157: my_first = (PetscInt) (sample - sample_rank * (size - 1));
158: }
159: }
160: }
161: for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
162: PetscMalloc1(size * max_keys_per, &finalpivots);
163: /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
164: * and allgather them */
165: for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
166: for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
167: MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
168: for (i = 0, count = 0; i < size; i++) {
169: PetscInt j;
171: for (j = 0; j < max_keys_per; j++) {
172: if (j < keys_per[i]) {
173: finalpivots[count++] = finalpivots[i * max_keys_per + j];
174: }
175: }
176: }
177: *outpivots = finalpivots;
178: PetscFree(keys_per);
179: PetscFree(pivots);
180: return(0);
181: }
183: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
184: {
185: PetscMPIInt size, rank;
186: PetscInt myOffset, nextOffset;
187: PetscInt i;
188: PetscMPIInt total, filled;
189: PetscMPIInt sender, nfirst, nsecond;
190: PetscMPIInt firsttag, secondtag;
191: MPI_Request firstreqrcv;
192: MPI_Request *firstreqs;
193: MPI_Request *secondreqs;
194: MPI_Status firststatus;
199: MPI_Comm_size(map->comm, &size);
200: MPI_Comm_rank(map->comm, &rank);
201: PetscCommGetNewTag(map->comm, &firsttag);
202: PetscCommGetNewTag(map->comm, &secondtag);
203: myOffset = 0;
204: PetscMalloc2(size, &firstreqs, size, &secondreqs);
205: MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);
206: myOffset = nextOffset - n;
207: total = map->range[rank + 1] - map->range[rank];
208: if (total > 0) {
209: MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);
210: }
211: for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
212: PetscInt itotal;
213: PetscInt overlap, oStart, oEnd;
215: itotal = map->range[i + 1] - map->range[i];
216: if (itotal <= 0) continue;
217: oStart = PetscMax(myOffset, map->range[i]);
218: oEnd = PetscMin(nextOffset, map->range[i + 1]);
219: overlap = oEnd - oStart;
220: if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
221: /* send first message */
222: MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));
223: } else if (overlap > 0) {
224: /* send second message */
225: MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
226: } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
227: /* send empty second message */
228: MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
229: }
230: }
231: filled = 0;
232: sender = -1;
233: if (total > 0) {
234: MPI_Wait(&firstreqrcv, &firststatus);
235: sender = firststatus.MPI_SOURCE;
236: MPI_Get_count(&firststatus, MPIU_INT, &filled);
237: }
238: while (filled < total) {
239: PetscMPIInt mfilled;
240: MPI_Status stat;
242: sender++;
243: MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);
244: MPI_Get_count(&stat, MPIU_INT, &mfilled);
245: filled += mfilled;
246: }
247: MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);
248: MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);
249: PetscFree2(firstreqs, secondreqs);
250: return(0);
251: }
253: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
254: {
255: PetscMPIInt size, rank;
256: PetscInt *pivots = NULL, *buffer;
257: PetscInt i, j;
258: PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
262: MPI_Comm_size(mapin->comm, &size);
263: MPI_Comm_rank(mapin->comm, &rank);
264: PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);
265: /* sort locally */
266: PetscSortInt(mapin->n, keysin);
267: /* get P - 1 pivots */
268: PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);
269: /* determine which entries in the sorted array go to which other processes based on the pivots */
270: for (i = 0, j = 0; i < size - 1; i++) {
271: PetscInt prev = j;
273: while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
274: offsets_snd[i] = prev;
275: keys_per_snd[i] = j - prev;
276: }
277: offsets_snd[size - 1] = j;
278: keys_per_snd[size - 1] = mapin->n - j;
279: offsets_snd[size] = mapin->n;
280: /* get the incoming sizes */
281: MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);
282: offsets_rcv[0] = 0;
283: for (i = 0; i < size; i++) {
284: offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
285: }
286: nrecv = offsets_rcv[size];
287: /* all to all exchange */
288: PetscMalloc1(nrecv, &buffer);
289: MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);
290: PetscFree(pivots);
291: PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);
293: /* local sort */
294: PetscSortInt(nrecv, buffer);
295: #if defined(PETSC_USE_DEBUG)
296: {
297: PetscBool sorted;
299: PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);
300: if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
301: }
302: #endif
304: /* redistribute to the desired order */
305: PetscParallelRedistribute(mapout, nrecv, buffer, keysout);
306: PetscFree(buffer);
307: return(0);
308: }
310: /*@
311: PetscParallelSortInt - Globally sort a distributed array of integers
313: Collective
315: Input Parameters:
316: + mapin - PetscLayout describing the distribution of the input keys
317: . mapout - PetscLayout describing the distribution of the output keys
318: - keysin - the pre-sorted array of integers
320: Output Parameter:
321: . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout.
323: Level: developer
325: Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
327: - sorts locally
328: - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
329: - using to the pivots to repartition the keys by all-to-all exchange
330: - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
331: - redistributing to match the mapout layout
333: If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
335: .seealso: PetscParallelSortedInt()
336: @*/
337: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
338: {
339: PetscMPIInt size;
340: PetscMPIInt result;
341: PetscInt *keysincopy = NULL;
347: MPI_Comm_compare(mapin->comm, mapout->comm, &result);
348: if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
349: PetscLayoutSetUp(mapin);
350: PetscLayoutSetUp(mapout);
353: if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N);
354: MPI_Comm_size(mapin->comm, &size);
355: if (size == 1) {
356: if (keysout != keysin) {
357: PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));
358: }
359: PetscSortInt(mapout->n, keysout);
360: if (size == 1) return(0);
361: }
362: if (keysout != keysin) {
363: PetscMalloc1(mapin->n, &keysincopy);
364: PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));
365: keysin = keysincopy;
366: }
367: PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);
368: #if defined(PETSC_USE_DEBUG)
369: {
370: PetscBool sorted;
372: PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);
373: if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
374: }
375: #endif
376: PetscFree(keysincopy);
377: return(0);
378: }