Actual source code: sortso.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5: {
6: PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7: return (l < r) ? -1 : (l > r);
8: }
10: static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11: {
12: PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13: return (l < r) ? -1 : (l > r);
14: }
16: static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17: {
18: PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8
23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37: {
38: __builtin_memcpy(t, ar, asize);
39: __builtin_memmove(ar, al, asize);
40: __builtin_memcpy(al, t, asize);
41: __builtin_memcpy(t, br, bsize);
42: __builtin_memmove(br, bl, bsize);
43: __builtin_memcpy(bl, t, bsize);
44: return;
45: }
47: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, bsrc, bsize);
57: return;
58: }
60: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: # else
73: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74: {
75: PETSC_COMM_SELF,PetscMemcpy(t, b, size);
76: PETSC_COMM_SELF,PetscMemmove(b, a, size);
77: PETSC_COMM_SELF,PetscMemcpy(a, t, size);
78: return;
79: }
81: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
82: {
83: PETSC_COMM_SELF,PetscMemcpy(t, ar, asize);
84: PETSC_COMM_SELF,PetscMemmove(ar, al, asize);
85: PETSC_COMM_SELF,PetscMemcpy(al, t, asize);
86: PETSC_COMM_SELF,PetscMemcpy(t, br, bsize);
87: PETSC_COMM_SELF,PetscMemmove(br, bl, bsize);
88: PETSC_COMM_SELF,PetscMemcpy(bl, t, bsize);
89: return;
90: }
92: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
93: {
94: PETSC_COMM_SELF,PetscMemcpy(dest, src, size);
95: return;
96: }
98: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
99: {
100: PETSC_COMM_SELF,PetscMemcpy(adest, asrc, asize);
101: PETSC_COMM_SELF,PetscMemcpy(bdest, bsrc, bsize);
102: return;
103: }
105: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
106: {
107: PETSC_COMM_SELF,PetscMemmove(dest, src, size);
108: return;
109: }
111: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
112: {
113: PETSC_COMM_SELF,PetscMemmove(adest, asrc, asize);
114: PETSC_COMM_SELF,PetscMemmove(bdest, bsrc, bsize);
115: return;
116: }
117: #endif
119: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
120: x. Output also inclusive.
122: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
124: {0,1,2,3,4,5}
126: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
127: */
128: static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
129: {
130: PetscInt last = l, k = 1, mid, cur = l+1;
132: *m = l;
133: PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft",r,l);
134: if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return 0;}
135: if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return 0;
136: while (PETSC_TRUE) {
137: if (PetscUnlikely(cur > r)) {cur = r; break;}
138: if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
139: last = cur;
140: cur += (k <<= 1) + 1;
141: ++k;
142: }
143: /* standard binary search but take last 0 mid 0 cur 1 into account*/
144: while (cur > last + 1) {
145: mid = last + ((cur - last) >> 1);
146: if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
147: cur = mid;
148: } else {
149: last = mid;
150: }
151: }
152: *m = cur;
153: return 0;
154: }
156: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
157: < x. Output also inclusive */
158: static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
159: {
160: PetscInt last = r, k = 1, mid, cur = r-1;
162: *m = r;
163: PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight",r,l);
164: if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return 0;}
165: if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return 0;
166: while (PETSC_TRUE) {
167: if (PetscUnlikely(cur < l)) {cur = l; break;}
168: if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
169: last = cur;
170: cur -= (k <<= 1) + 1;
171: ++k;
172: }
173: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
174: while (last > cur + 1) {
175: mid = last - ((last - cur) >> 1);
176: if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
177: cur = mid;
178: } else {
179: last = mid;
180: }
181: }
182: *m = cur;
183: return 0;
184: }
186: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
187: complete array, left is first index of left array, mid is first index of right array, right is last index of right
188: array */
189: static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
190: {
191: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
192: const PetscInt llen = mid-left;
194: Petsc_memcpy(tarr, arr+(left*size), llen*size);
195: while ((i < llen) && (j <= right)) {
196: if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
197: Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
198: ++k;
199: gallopright = 0;
200: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
201: PetscInt l1, l2, diff1, diff2;
202: do {
203: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
204: /* search temp for right[j], can move up to that of temp into arr immediately */
205: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
206: diff1 = l1-i;
207: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
208: k += diff1;
209: i = l1;
210: /* search right for temp[i], can move up to that many of right into arr */
211: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
212: diff2 = l2-j;
213: Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
214: k += diff2;
215: j = l2;
216: if (i >= llen || j > right) break;
217: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
218: ++MIN_GALLOP_GLOBAL;
219: }
220: } else {
221: Petsc_memmove(arr+(k*size), arr+(j*size), size);
222: ++k;
223: gallopleft = 0;
224: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
225: PetscInt l1, l2, diff1, diff2;
226: do {
227: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
228: /* search right for temp[i], can move up to that many of right into arr */
229: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
230: diff2 = l2-j;
231: Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
232: k += diff2;
233: j = l2;
234: /* search temp for right[j], can copy up to that of temp into arr immediately */
235: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
236: diff1 = l1-i;
237: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
238: k += diff1;
239: i = l1;
240: if (i >= llen || j > right) break;
241: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
242: ++MIN_GALLOP_GLOBAL;
243: }
244: }
245: }
246: if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
247: return 0;
248: }
250: static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
251: {
252: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
253: const PetscInt llen = mid-left;
255: Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
256: while ((i < llen) && (j <= right)) {
257: if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
258: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
259: ++k;
260: gallopright = 0;
261: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
262: PetscInt l1, l2, diff1, diff2;
263: do {
264: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
265: /* search temp for right[j], can move up to that of temp into arr immediately */
266: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
267: diff1 = l1-i;
268: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
269: k += diff1;
270: i = l1;
271: /* search right for temp[i], can move up to that many of right into arr */
272: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
273: diff2 = l2-j;
274: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
275: k += diff2;
276: j = l2;
277: if (i >= llen || j > right) break;
278: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
279: ++MIN_GALLOP_GLOBAL;
280: }
281: } else {
282: Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
283: ++k;
284: gallopleft = 0;
285: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
286: PetscInt l1, l2, diff1, diff2;
287: do {
288: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
289: /* search right for temp[i], can move up to that many of right into arr */
290: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291: diff2 = l2-j;
292: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293: k += diff2;
294: j = l2;
295: /* search temp for right[j], can copy up to that of temp into arr immediately */
296: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
297: diff1 = l1-i;
298: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
299: k += diff1;
300: i = l1;
301: if (i >= llen || j > right) break;
302: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
303: ++MIN_GALLOP_GLOBAL;
304: }
305: }
306: }
307: if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
308: return 0;
309: }
311: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
312: complete array, left is first index of left array, mid is first index of right array, right is last index of right
313: array */
314: static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
315: {
316: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
317: const PetscInt rlen = right-mid+1;
319: Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
320: while ((i >= 0) && (j >= left)) {
321: if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
322: Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
323: --k;
324: gallopleft = 0;
325: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
326: PetscInt l1, l2, diff1, diff2;
327: do {
328: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
329: /* search temp for left[j], can copy up to that many of temp into arr */
330: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
331: diff1 = i-l1;
332: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
333: k -= diff1;
334: i = l1;
335: /* search left for temp[i], can move up to that many of left up arr */
336: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
337: diff2 = j-l2;
338: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
339: k -= diff2;
340: j = l2;
341: if (i < 0 || j < left) break;
342: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
343: ++MIN_GALLOP_GLOBAL;
344: }
345: } else {
346: Petsc_memmove((arr)+k*size, (arr)+j*size, size);
347: --k;
348: gallopright = 0;
349: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
350: PetscInt l1, l2, diff1, diff2;
351: do {
352: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
353: /* search left for temp[i], can move up to that many of left up arr */
354: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
355: diff2 = j-l2;
356: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
357: k -= diff2;
358: j = l2;
359: /* search temp for left[j], can copy up to that many of temp into arr */
360: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
361: diff1 = i-l1;
362: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
363: k -= diff1;
364: i = l1;
365: if (i < 0 || j < left) break;
366: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
367: ++MIN_GALLOP_GLOBAL;
368: }
369: }
370: }
371: if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
372: return 0;
373: }
375: static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
376: {
377: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
378: const PetscInt rlen = right-mid+1;
380: Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
381: while ((i >= 0) && (j >= left)) {
382: if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
383: Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
384: --k;
385: gallopleft = 0;
386: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
387: PetscInt l1, l2, diff1, diff2;
388: do {
389: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
390: /* search temp for left[j], can copy up to that many of temp into arr */
391: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
392: diff1 = i-l1;
393: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
394: k -= diff1;
395: i = l1;
396: /* search left for temp[i], can move up to that many of left up arr */
397: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
398: diff2 = j-l2;
399: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
400: k -= diff2;
401: j = l2;
402: if (i < 0 || j < left) break;
403: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
404: ++MIN_GALLOP_GLOBAL;
405: }
406: } else {
407: Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
408: --k;
409: gallopright = 0;
410: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
411: PetscInt l1, l2, diff1, diff2;
412: do {
413: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
414: /* search left for temp[i], can move up to that many of left up arr */
415: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
416: diff2 = j-l2;
417: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
418: k -= diff2;
419: j = l2;
420: /* search temp for left[j], can copy up to that many of temp into arr */
421: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
422: diff1 = i-l1;
423: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
424: k -= diff1;
425: i = l1;
426: if (i < 0 || j < left) break;
427: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
428: ++MIN_GALLOP_GLOBAL;
429: }
430: }
431: }
432: if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
433: return 0;
434: }
436: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
437: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
438: static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
439: {
440: PetscInt i = start == left ? start+1 : start;
442: for (; i <= right; ++i) {
443: PetscInt j = i-1;
444: Petsc_memcpy(tarr, arr+(i*size), size);
445: while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
446: COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
447: --j;
448: }
449: Petsc_memcpy((arr)+(j+1)*size, tarr, size);
450: }
451: return 0;
452: }
454: static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
455: {
456: PetscInt i = start == left ? start+1 : start;
458: for (; i <= right; ++i) {
459: PetscInt j = i-1;
460: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
461: while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
462: COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
463: --j;
464: }
465: Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
466: }
467: return 0;
468: }
470: /* See PetscInsertionSort_Private */
471: static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
472: {
473: PetscInt i = start == left ? start+1 : start;
475: for (; i <= right; ++i) {
476: PetscInt l = left, r = i;
477: Petsc_memcpy(tarr, arr+(i*size), size);
478: do {
479: const PetscInt m = l + ((r - l) >> 1);
480: if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
481: r = m;
482: } else {
483: l = m + 1;
484: }
485: } while (l < r);
486: Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
487: Petsc_memcpy(arr+(l*size), tarr, size);
488: }
489: return 0;
490: }
492: static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
493: {
494: PetscInt i = start == left ? start+1 : start;
496: for (; i <= right; ++i) {
497: PetscInt l = left, r = i;
498: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
499: do {
500: const PetscInt m = l + ((r - l) >> 1);
501: if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
502: r = m;
503: } else {
504: l = m + 1;
505: }
506: } while (l < r);
507: Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
508: Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
509: }
510: return 0;
511: }
513: typedef struct {
514: PetscInt size;
515: PetscInt start;
516: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
517: } PetscTimSortStack;
518: #else
519: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
520: #endif
522: typedef struct {
523: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
524: size_t size;
525: size_t maxsize;
526: } PetscTimSortBuffer;
528: static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
529: {
530: if (PetscLikely(newSize <= buff->size)) return 0;
531: {
532: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
533: size_t newMax = PetscMin(newSize*newSize, buff->maxsize);
534: PetscFree(buff->ptr);
535: PetscMalloc1(newMax, &buff->ptr);
536: buff->size = newMax;
537: }
538: return 0;
539: }
541: static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
542: {
543: for (;stacksize; --stacksize) {
544: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
545: if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
546: PetscInt l, m = stack[stacksize].start, r;
547: /* Search A for B[0] insertion */
548: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
549: /* Search B for A[-1] insertion */
550: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
551: if (m-l <= r-m) {
552: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
553: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
554: } else {
555: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
556: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
557: }
558: }
559: /* Update A with merge */
560: stack[stacksize-1].size += stack[stacksize].size;
561: }
562: return 0;
563: }
565: static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
566: {
567: for (;stacksize; --stacksize) {
568: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
569: if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
570: PetscInt l, m = stack[stacksize].start, r;
571: /* Search A for B[0] insertion */
572: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
573: /* Search B for A[-1] insertion */
574: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
575: if (m-l <= r-m) {
576: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
577: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
578: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
579: } else {
580: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
581: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
582: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
583: }
584: }
585: /* Update A with merge */
586: stack[stacksize-1].size += stack[stacksize].size;
587: }
588: return 0;
589: }
591: static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
592: {
593: PetscInt i = *stacksize;
595: while (i) {
596: PetscInt l, m, r, itemp = i;
598: if (i == 1) {
599: /* A = stack[i-1], B = stack[i] */
600: if (stack[i-1].size < stack[i].size) {
601: /* if A[-1] <= B[0] then sorted */
602: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
603: m = stack[i].start;
604: /* Search A for B[0] insertion */
605: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
606: /* Search B for A[-1] insertion */
607: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
608: if (m-l <= r-m) {
609: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
610: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
611: } else {
612: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
613: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
614: }
615: }
616: /* Update A with merge */
617: stack[i-1].size += stack[i].size;
618: --i;
619: }
620: } else {
621: /* i > 2, i.e. C exists
622: A = stack[i-2], B = stack[i-1], C = stack[i]; */
623: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
624: if (stack[i-2].size < stack[i].size) {
625: /* merge B into A, but if A[-1] <= B[0] then already sorted */
626: if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
627: m = stack[i-1].start;
628: /* Search A for B[0] insertion */
629: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
630: /* Search B for A[-1] insertion */
631: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
632: if (m-l <= r-m) {
633: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
634: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
635: } else {
636: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
637: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
638: }
639: }
640: /* Update A with merge */
641: stack[i-2].size += stack[i-1].size;
642: /* Push C up the stack */
643: stack[i-1].start = stack[i].start;
644: stack[i-1].size = stack[i].size;
645: } else {
646: /* merge C into B */
647: mergeBC:
648: /* If B[-1] <= C[0] then... you know the drill */
649: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
650: m = stack[i].start;
651: /* Search B for C[0] insertion */
652: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
653: /* Search C for B[-1] insertion */
654: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
655: if (m-l <= r-m) {
656: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
657: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
658: } else {
659: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
660: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
661: }
662: }
663: /* Update B with merge */
664: stack[i-1].size += stack[i].size;
665: }
666: --i;
667: } else if (stack[i-1].size <= stack[i].size) {
668: /* merge C into B */
669: goto mergeBC;
670: }
671: }
672: if (itemp == i) break;
673: }
674: *stacksize = i;
675: return 0;
676: }
678: static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
679: {
680: PetscInt i = *stacksize;
682: while (i) {
683: PetscInt l, m, r, itemp = i;
685: if (i == 1) {
686: /* A = stack[i-1], B = stack[i] */
687: if (stack[i-1].size < stack[i].size) {
688: /* if A[-1] <= B[0] then sorted */
689: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
690: m = stack[i].start;
691: /* Search A for B[0] insertion */
692: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
693: /* Search B for A[-1] insertion */
694: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
695: if (m-l <= r-m) {
696: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
697: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
698: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
699: } else {
700: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
701: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
702: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
703: }
704: }
705: /* Update A with merge */
706: stack[i-1].size += stack[i].size;
707: --i;
708: }
709: } else {
710: /* i > 2, i.e. C exists
711: A = stack[i-2], B = stack[i-1], C = stack[i]; */
712: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
713: if (stack[i-2].size < stack[i].size) {
714: /* merge B into A, but if A[-1] <= B[0] then already sorted */
715: if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
716: m = stack[i-1].start;
717: /* Search A for B[0] insertion */
718: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
719: /* Search B for A[-1] insertion */
720: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
721: if (m-l <= r-m) {
722: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
723: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
724: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
725: } else {
726: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
727: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
728: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
729: }
730: }
731: /* Update A with merge */
732: stack[i-2].size += stack[i-1].size;
733: /* Push C up the stack */
734: stack[i-1].start = stack[i].start;
735: stack[i-1].size = stack[i].size;
736: } else {
737: /* merge C into B */
738: mergeBC:
739: /* If B[-1] <= C[0] then... you know the drill */
740: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
741: m = stack[i].start;
742: /* Search B for C[0] insertion */
743: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
744: /* Search C for B[-1] insertion */
745: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
746: if (m-l <= r-m) {
747: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
748: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
749: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
750: } else {
751: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
752: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
753: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
754: }
755: }
756: /* Update B with merge */
757: stack[i-1].size += stack[i].size;
758: }
759: --i;
760: } else if (stack[i-1].size <= stack[i].size) {
761: /* merge C into B */
762: goto mergeBC;
763: }
764: }
765: if (itemp == i) break;
766: }
767: *stacksize = i;
768: return 0;
769: }
771: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
772: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
773: binary insertion sort or regulat insertion sort */
774: static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
775: {
776: const PetscInt re = PetscMin(runstart+minrun, n-1);
777: PetscInt ri = runstart;
779: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return 0;}
780: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
781: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
782: ++ri;
783: while (ri < n-1) {
784: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
785: ++ri;
786: }
787: {
788: PetscInt lo = runstart, hi = ri;
789: for (; lo < hi; ++lo, --hi) {
790: COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
791: }
792: }
793: } else {
794: ++ri;
795: while (ri < n-1) {
796: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
797: ++ri;
798: }
799: }
800: if (ri < re) {
801: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
802: binary search */
803: if (ri-runstart <= minrun >> 1) {
804: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
805: PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
806: } else {
807: PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
808: }
809: *runend = re;
810: } else *runend = ri;
811: return 0;
812: }
814: static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
815: {
816: const PetscInt re = PetscMin(runstart+minrun, n-1);
817: PetscInt ri = runstart;
819: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return 0;}
820: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
821: if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
822: ++ri;
823: while (ri < n-1) {
824: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
825: ++ri;
826: }
827: {
828: PetscInt lo = runstart, hi = ri;
829: for (; lo < hi; ++lo, --hi) {
830: COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
831: }
832: }
833: } else {
834: ++ri;
835: while (ri < n-1) {
836: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
837: ++ri;
838: }
839: }
840: if (ri < re) {
841: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
842: binary search */
843: if (ri-runstart <= minrun >> 1) {
844: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
845: PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
846: } else {
847: PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
848: }
849: *runend = re;
850: } else *runend = ri;
851: return 0;
852: }
854: /*@C
855: PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
857: Not Collective
859: Input Parameters:
860: + n - number of values
861: . arr - array to be sorted
862: . size - size in bytes of the datatype held in arr
863: . cmp - function pointer to comparison function
864: - ctx - optional context to be passed to comparison function, NULL if not needed
866: Output Parameters:
867: . arr - sorted array
869: Notes:
870: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
871: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
872: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
873: do so it repeatedly triggers attempts throughout to merge adjacent runs.
875: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
876: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
877: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
878: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
879: likely all contain similar data.
881: Sample usage:
882: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
883: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
884: may also
885: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
886: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
887: order
889: .vb
890: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
891: my_type l = *(my_type *) left, r = *(my_type *) right;
892: return (l < r) ? -1 : (l > r);
893: }
894: .ve
895: Note the context is unused here but you may use it to pass and subsequently access whatever information required
896: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
897: Then pass the function
898: .vb
899: PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
900: .ve
902: Fortran Notes:
903: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
904: returns result. For example
905: .vb
906: subroutine CompareIntegers(left,right,ctx,result)
907: implicit none
909: PetscInt,intent(in) :: left, right
910: type(UserCtx) :: ctx
911: integer,intent(out) :: result
913: if (left < right) then
914: result = -1
915: else if (left == right) then
916: result = 0
917: else
918: result = 1
919: end if
920: return
921: end subroutine CompareIntegers
922: .ve
924: References:
925: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
927: Level: developer
929: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
930: @*/
931: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
932: {
933: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
934: PetscTimSortStack runstack[128];
935: PetscTimSortBuffer buff;
936: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
937: It is so unlikely that this limit is reached that this is __never__ checked for */
939: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
940: is a power of 2 or one plus a power of 2 */
941: {
942: PetscInt t = n, r = 0;
943: /* r becomes 1 if the least significant bits contain at least one off bit */
944: while (t >= 64) {
945: r |= t & 1;
946: t >>= 1;
947: }
948: minrun = t + r;
949: }
950: if (PetscDefined(USE_DEBUG)) {
951: PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
952: if (n < 64) {
953: PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n);
955: }
956: PetscMalloc1((size_t) minrun*size, &buff.ptr);
957: buff.size = (size_t) minrun*size;
958: buff.maxsize = (size_t) n*size;
959: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
960: while (runstart < n) {
961: /* Check if additional entries are at least partially ordered and build natural run */
962: PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
963: runstack[stacksize].start = runstart;
964: runstack[stacksize].size = runend-runstart+1;
965: PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
966: ++stacksize;
967: runstart = runend+1;
968: }
969: /* Have been inside while, so discard last stacksize++ */
970: --stacksize;
971: PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
972: PetscFree(buff.ptr);
973: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
974: return 0;
975: }
977: /*@C
978: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
979: reorders a second array to match the first. The arrays need not be the same type.
981: Not Collective
983: Input Parameters:
984: + n - number of values
985: . asize - size in bytes of the datatype held in arr
986: . bsize - size in bytes of the datatype held in barr
987: . cmp - function pointer to comparison function
988: - ctx - optional context to be passed to comparison function, NULL if not needed
990: Input/Output Parameters:
991: + arr - array to be sorted, on output it is sorted
992: - barr - array to be reordered, on output it is reordered
994: Notes:
995: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
996: overlap.
998: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
999: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1000: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1001: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1003: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1004: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1005: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1006: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1007: likely all contain similar data.
1009: Sample usage:
1010: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1011: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1012: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1013: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1014: increasing order
1016: .vb
1017: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1018: my_type l = *(my_type *) left, r = *(my_type *) right;
1019: return (l < r) ? -1 : (l > r);
1020: }
1021: .ve
1022: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1023: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1024: Then pass the function
1025: .vb
1026: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1027: .ve
1029: Fortran Notes:
1030: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1031: returns result. For example
1032: .vb
1033: subroutine CompareIntegers(left,right,ctx,result)
1034: implicit none
1036: PetscInt,intent(in) :: left, right
1037: type(UserCtx) :: ctx
1038: integer,intent(out) :: result
1040: if (left < right) then
1041: result = -1
1042: else if (left == right) then
1043: result = 0
1044: else
1045: result = 1
1046: end if
1047: return
1048: end subroutine CompareIntegers
1049: .ve
1051: References:
1052: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1054: Level: developer
1056: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1057: @*/
1058: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1059: {
1060: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1061: PetscTimSortStack runstack[128];
1062: PetscTimSortBuffer abuff, bbuff;
1063: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1064: It is so unlikely that this limit is reached that this is __never__ checked for */
1066: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1067: is a power of 2 or one plus a power of 2 */
1068: {
1069: PetscInt t = n, r = 0;
1070: /* r becomes 1 if the least significant bits contain at least one off bit */
1071: while (t >= 64) {
1072: r |= t & 1;
1073: t >>= 1;
1074: }
1075: minrun = t + r;
1076: }
1077: if (PetscDefined(USE_DEBUG)) {
1078: PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
1080: }
1081: PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1082: abuff.size = (size_t) minrun*asize;
1083: abuff.maxsize = (size_t) n*asize;
1084: PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1085: bbuff.size = (size_t) minrun*bsize;
1086: bbuff.maxsize = (size_t) n*bsize;
1087: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1088: while (runstart < n) {
1089: /* Check if additional entries are at least partially ordered and build natural run */
1090: PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1091: runstack[stacksize].start = runstart;
1092: runstack[stacksize].size = runend-runstart+1;
1093: PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1094: ++stacksize;
1095: runstart = runend+1;
1096: }
1097: /* Have been inside while, so discard last stacksize++ */
1098: --stacksize;
1099: PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1100: PetscFree(abuff.ptr);
1101: PetscFree(bbuff.ptr);
1102: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1103: return 0;
1104: }
1106: /*@
1107: PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1109: Not Collective
1111: Input Parameters:
1112: + n - number of values
1113: - arr - array of integers
1115: Output Parameters:
1116: . arr - sorted array of integers
1118: Notes:
1119: If the array is less than 64 entries long PetscSortInt() is automatically used.
1121: This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1122: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1123: recommended that the user benchmark their code to see which routine is fastest.
1125: Level: intermediate
1127: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1128: @*/
1129: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1130: {
1131: if (n <= 1) return 0;
1133: if (n < 64) {
1134: PetscSortInt(n, arr);
1135: } else {
1136: PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1137: }
1138: return 0;
1139: }
1141: /*@
1142: PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1143: array to match the first.
1145: Not Collective
1147: Input Parameter:
1148: . n - number of values
1150: Input/Output Parameters:
1151: + arr1 - array of integers to be sorted, modified on output
1152: - arr2 - array of integers to be reordered, modified on output
1154: Notes:
1155: The arrays CANNOT overlap.
1157: This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1158: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1159: recommended that the user benchmark their code to see which routine is fastest.
1161: Level: intermediate
1163: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1164: @*/
1165: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1166: {
1167: if (n <= 1) return 0;
1170: /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1171: PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1172: return 0;
1173: }
1175: /*@
1176: PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1178: Not Collective
1180: Input Parameters:
1181: + n - number of values
1182: - arr - array of PetscMPIInts
1184: Output Parameters:
1185: . arr - sorted array of integers
1187: Notes:
1188: If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1190: This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1191: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1192: recommended that the user benchmark their code to see which routine is fastest.
1194: Level: intermediate
1196: .seealso: PetscTimSort(), PetscSortMPIInt()
1197: @*/
1198: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1199: {
1200: if (n <= 1) return 0;
1202: if (n < 64) {
1203: PetscSortMPIInt(n, arr);
1204: } else {
1205: PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1206: }
1207: return 0;
1208: }
1210: /*@
1211: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1212: array to match the first.
1214: Not Collective
1216: Input Parameter:
1217: . n - number of values
1219: Input/Output Parameters:
1220: . arr1 - array of integers to be sorted, modified on output
1221: - arr2 - array of integers to be reordered, modified on output
1223: Notes:
1224: The arrays CANNOT overlap.
1226: This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1227: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1228: recommended that the user benchmark their code to see which routine is fastest.
1230: Level: intermediate
1232: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1233: @*/
1234: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1235: {
1236: if (n <= 1) return 0;
1239: /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1240: PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1241: return 0;
1242: }
1244: /*@
1245: PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1247: Not Collective
1249: Input Parameters:
1250: + n - number of values
1251: - arr - array of PetscReals
1253: Output Parameters:
1254: . arr - sorted array of integers
1256: Notes:
1257: If the array is less than 64 entries long PetscSortReal() is automatically used.
1259: This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1260: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1261: recommended that the user benchmark their code to see which routine is fastest.
1263: Level: intermediate
1265: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1266: @*/
1267: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1268: {
1269: if (n <= 1) return 0;
1271: if (n < 64) {
1272: PetscSortReal(n, arr);
1273: } else {
1274: PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1275: }
1276: return 0;
1277: }
1279: /*@
1280: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1281: array of PetscInts to match the first.
1283: Not Collective
1285: Input Parameter:
1286: . n - number of values
1288: Input/Output Parameters:
1289: . arr1 - array of PetscReals to be sorted, modified on output
1290: - arr2 - array of PetscReals to be reordered, modified on output
1292: Notes:
1293: This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1294: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1295: recommended that the user benchmark their code to see which routine is fastest.
1297: Level: intermediate
1299: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1300: @*/
1301: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1302: {
1303: if (n <= 1) return 0;
1306: /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1307: PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1308: return 0;
1309: }