Actual source code: sortso.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5: {
6: PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7: return (l < r) ? -1 : (l > r);
8: }
10: PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11: {
12: PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13: return (l < r) ? -1 : (l > r);
14: }
16: PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17: {
18: PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8
23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37: {
38: __builtin_memcpy(t, ar, asize);
39: __builtin_memmove(ar, al, asize);
40: __builtin_memcpy(al, t, asize);
41: __builtin_memcpy(t, br, bsize);
42: __builtin_memmove(br, bl, bsize);
43: __builtin_memcpy(bl, t, bsize);
44: return;
45: }
47: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, bsrc, bsize);
57: return;
58: }
60: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: # else
73: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74: {
77: PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78: PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79: PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80: PetscFunctionReturnVoid();
81: }
83: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84: {
87: PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88: PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89: PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90: PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91: PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92: PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93: PetscFunctionReturnVoid();
94: }
96: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97: {
100: PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101: PetscFunctionReturnVoid();
102: }
104: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105: {
108: PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109: PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110: PetscFunctionReturnVoid();
111: }
113: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114: {
117: PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118: PetscFunctionReturnVoid();
119: }
121: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122: {
125: PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126: PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127: PetscFunctionReturnVoid();
128: }
129: #endif
131: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132: x. Output also inclusive.
134: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
136: {0,1,2,3,4,5}
138: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139: */
140: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141: {
142: PetscInt last = l, k = 1, mid, cur = l+1;
145: *m = l;
146: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147: if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return(0);}
148: if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return(0);
149: while (PETSC_TRUE) {
150: if (PetscUnlikely(cur > r)) {cur = r; break;}
151: if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152: last = cur;
153: cur += (k <<= 1) + 1;
154: ++k;
155: }
156: /* standard binary search but take last 0 mid 0 cur 1 into account*/
157: while (cur > last + 1) {
158: mid = last + ((cur - last) >> 1);
159: if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160: cur = mid;
161: } else {
162: last = mid;
163: }
164: }
165: *m = cur;
166: return(0);
167: }
169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170: < x. Output also inclusive */
171: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172: {
173: PetscInt last = r, k = 1, mid, cur = r-1;
176: *m = r;
177: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178: if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return(0);}
179: if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return(0);
180: while (PETSC_TRUE) {
181: if (PetscUnlikely(cur < l)) {cur = l; break;}
182: if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183: last = cur;
184: cur -= (k <<= 1) + 1;
185: ++k;
186: }
187: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188: while (last > cur + 1) {
189: mid = last - ((last - cur) >> 1);
190: if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191: cur = mid;
192: } else {
193: last = mid;
194: }
195: }
196: *m = cur;
197: return(0);
198: }
200: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201: complete array, left is first index of left array, mid is first index of right array, right is last index of right
202: array */
203: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204: {
205: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206: const PetscInt llen = mid-left;
210: Petsc_memcpy(tarr, arr+(left*size), llen*size);
211: while ((i < llen) && (j <= right)) {
212: if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213: Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214: ++k;
215: gallopright = 0;
216: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217: PetscInt l1, l2, diff1, diff2;
218: do {
219: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220: /* search temp for right[j], can move up to that of temp into arr immediately */
221: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
222: diff1 = l1-i;
223: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224: k += diff1;
225: i = l1;
226: /* search right for temp[i], can move up to that many of right into arr */
227: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
228: diff2 = l2-j;
229: Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230: k += diff2;
231: j = l2;
232: if (i >= llen || j > right) break;
233: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234: ++MIN_GALLOP_GLOBAL;
235: }
236: } else {
237: Petsc_memmove(arr+(k*size), arr+(j*size), size);
238: ++k;
239: gallopleft = 0;
240: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241: PetscInt l1, l2, diff1, diff2;
242: do {
243: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244: /* search right for temp[i], can move up to that many of right into arr */
245: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
246: diff2 = l2-j;
247: Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248: k += diff2;
249: j = l2;
250: /* search temp for right[j], can copy up to that of temp into arr immediately */
251: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
252: diff1 = l1-i;
253: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254: k += diff1;
255: i = l1;
256: if (i >= llen || j > right) break;
257: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258: ++MIN_GALLOP_GLOBAL;
259: }
260: }
261: }
262: if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263: return(0);
264: }
266: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267: {
268: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269: const PetscInt llen = mid-left;
273: Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274: while ((i < llen) && (j <= right)) {
275: if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277: ++k;
278: gallopright = 0;
279: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280: PetscInt l1, l2, diff1, diff2;
281: do {
282: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283: /* search temp for right[j], can move up to that of temp into arr immediately */
284: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
285: diff1 = l1-i;
286: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287: k += diff1;
288: i = l1;
289: /* search right for temp[i], can move up to that many of right into arr */
290: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291: diff2 = l2-j;
292: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293: k += diff2;
294: j = l2;
295: if (i >= llen || j > right) break;
296: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297: ++MIN_GALLOP_GLOBAL;
298: }
299: } else {
300: Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301: ++k;
302: gallopleft = 0;
303: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304: PetscInt l1, l2, diff1, diff2;
305: do {
306: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307: /* search right for temp[i], can move up to that many of right into arr */
308: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
309: diff2 = l2-j;
310: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311: k += diff2;
312: j = l2;
313: /* search temp for right[j], can copy up to that of temp into arr immediately */
314: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
315: diff1 = l1-i;
316: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317: k += diff1;
318: i = l1;
319: if (i >= llen || j > right) break;
320: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321: ++MIN_GALLOP_GLOBAL;
322: }
323: }
324: }
325: if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326: return(0);
327: }
329: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330: complete array, left is first index of left array, mid is first index of right array, right is last index of right
331: array */
332: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333: {
334: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335: const PetscInt rlen = right-mid+1;
339: Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340: while ((i >= 0) && (j >= left)) {
341: if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342: Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343: --k;
344: gallopleft = 0;
345: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346: PetscInt l1, l2, diff1, diff2;
347: do {
348: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349: /* search temp for left[j], can copy up to that many of temp into arr */
350: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
351: diff1 = i-l1;
352: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353: k -= diff1;
354: i = l1;
355: /* search left for temp[i], can move up to that many of left up arr */
356: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
357: diff2 = j-l2;
358: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359: k -= diff2;
360: j = l2;
361: if (i < 0 || j < left) break;
362: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363: ++MIN_GALLOP_GLOBAL;
364: }
365: } else {
366: Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367: --k;
368: gallopright = 0;
369: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370: PetscInt l1, l2, diff1, diff2;
371: do {
372: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373: /* search left for temp[i], can move up to that many of left up arr */
374: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
375: diff2 = j-l2;
376: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377: k -= diff2;
378: j = l2;
379: /* search temp for left[j], can copy up to that many of temp into arr */
380: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
381: diff1 = i-l1;
382: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383: k -= diff1;
384: i = l1;
385: if (i < 0 || j < left) break;
386: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387: ++MIN_GALLOP_GLOBAL;
388: }
389: }
390: }
391: if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392: return(0);
393: }
395: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396: {
397: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398: const PetscInt rlen = right-mid+1;
402: Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403: while ((i >= 0) && (j >= left)) {
404: if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405: Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406: --k;
407: gallopleft = 0;
408: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409: PetscInt l1, l2, diff1, diff2;
410: do {
411: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412: /* search temp for left[j], can copy up to that many of temp into arr */
413: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
414: diff1 = i-l1;
415: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416: k -= diff1;
417: i = l1;
418: /* search left for temp[i], can move up to that many of left up arr */
419: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
420: diff2 = j-l2;
421: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422: k -= diff2;
423: j = l2;
424: if (i < 0 || j < left) break;
425: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426: ++MIN_GALLOP_GLOBAL;
427: }
428: } else {
429: Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430: --k;
431: gallopright = 0;
432: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433: PetscInt l1, l2, diff1, diff2;
434: do {
435: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436: /* search left for temp[i], can move up to that many of left up arr */
437: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
438: diff2 = j-l2;
439: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440: k -= diff2;
441: j = l2;
442: /* search temp for left[j], can copy up to that many of temp into arr */
443: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
444: diff1 = i-l1;
445: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446: k -= diff1;
447: i = l1;
448: if (i < 0 || j < left) break;
449: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450: ++MIN_GALLOP_GLOBAL;
451: }
452: }
453: }
454: if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455: return(0);
456: }
458: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461: {
462: PetscInt i = start == left ? start+1 : start;
465: for (; i <= right; ++i) {
466: PetscInt j = i-1;
467: Petsc_memcpy(tarr, arr+(i*size), size);
468: while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469: COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470: --j;
471: }
472: Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473: }
474: return(0);
475: }
477: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478: {
479: PetscInt i = start == left ? start+1 : start;
482: for (; i <= right; ++i) {
483: PetscInt j = i-1;
484: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485: while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486: COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487: --j;
488: }
489: Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490: }
491: return(0);
492: }
494: /* See PetscInsertionSort_Private */
495: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496: {
497: PetscInt i = start == left ? start+1 : start;
500: for (; i <= right; ++i) {
501: PetscInt l = left, r = i;
502: Petsc_memcpy(tarr, arr+(i*size), size);
503: do {
504: const PetscInt m = l + ((r - l) >> 1);
505: if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506: r = m;
507: } else {
508: l = m + 1;
509: }
510: } while (l < r);
511: Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512: Petsc_memcpy(arr+(l*size), tarr, size);
513: }
514: return(0);
515: }
517: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518: {
519: PetscInt i = start == left ? start+1 : start;
522: for (; i <= right; ++i) {
523: PetscInt l = left, r = i;
524: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525: do {
526: const PetscInt m = l + ((r - l) >> 1);
527: if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528: r = m;
529: } else {
530: l = m + 1;
531: }
532: } while (l < r);
533: Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534: Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535: }
536: return(0);
537: }
539: typedef struct {
540: PetscInt size;
541: PetscInt start;
542: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
544: typedef struct {
545: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546: size_t size;
547: size_t maxsize;
548: } PetscTimSortBuffer;
550: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551: {
553: if (PetscLikely(newSize <= buff->size)) return(0);
554: {
555: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
557: size_t newMax = PetscMin(newSize*newSize, buff->maxsize);
558: PetscFree(buff->ptr);
559: PetscMalloc1(newMax, &buff->ptr);
560: buff->size = newMax;
561: }
562: return(0);
563: }
565: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566: {
568: for (;stacksize; --stacksize) {
569: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570: if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571: PetscInt l, m = stack[stacksize].start, r;
573: /* Search A for B[0] insertion */
574: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
575: /* Search B for A[-1] insertion */
576: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
577: if (m-l <= r-m) {
578: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
579: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
580: } else {
581: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
582: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
583: }
584: }
585: /* Update A with merge */
586: stack[stacksize-1].size += stack[stacksize].size;
587: }
588: return(0);
589: }
591: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592: {
594: for (;stacksize; --stacksize) {
595: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596: if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597: PetscInt l, m = stack[stacksize].start, r;
599: /* Search A for B[0] insertion */
600: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
601: /* Search B for A[-1] insertion */
602: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
603: if (m-l <= r-m) {
604: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
605: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
606: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
607: } else {
608: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
609: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
610: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
611: }
612: }
613: /* Update A with merge */
614: stack[stacksize-1].size += stack[stacksize].size;
615: }
616: return(0);
617: }
619: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620: {
621: PetscInt i = *stacksize;
625: while (i) {
626: PetscInt l, m, r, itemp = i;
628: if (i == 1) {
629: /* A = stack[i-1], B = stack[i] */
630: if (stack[i-1].size < stack[i].size) {
631: /* if A[-1] <= B[0] then sorted */
632: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633: m = stack[i].start;
634: /* Search A for B[0] insertion */
635: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
636: /* Search B for A[-1] insertion */
637: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
638: if (m-l <= r-m) {
639: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
640: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
641: } else {
642: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
643: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
644: }
645: }
646: /* Update A with merge */
647: stack[i-1].size += stack[i].size;
648: --i;
649: }
650: } else {
651: /* i > 2, i.e. C exists
652: A = stack[i-2], B = stack[i-1], C = stack[i]; */
653: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654: if (stack[i-2].size < stack[i].size) {
655: /* merge B into A, but if A[-1] <= B[0] then already sorted */
656: if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657: m = stack[i-1].start;
658: /* Search A for B[0] insertion */
659: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
660: /* Search B for A[-1] insertion */
661: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
662: if (m-l <= r-m) {
663: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
664: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
665: } else {
666: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
667: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
668: }
669: }
670: /* Update A with merge */
671: stack[i-2].size += stack[i-1].size;
672: /* Push C up the stack */
673: stack[i-1].start = stack[i].start;
674: stack[i-1].size = stack[i].size;
675: } else {
676: /* merge C into B */
677: mergeBC:
678: /* If B[-1] <= C[0] then... you know the drill */
679: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680: m = stack[i].start;
681: /* Search B for C[0] insertion */
682: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
683: /* Search C for B[-1] insertion */
684: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
685: if (m-l <= r-m) {
686: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
687: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
688: } else {
689: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
690: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
691: }
692: }
693: /* Update B with merge */
694: stack[i-1].size += stack[i].size;
695: }
696: --i;
697: } else if (stack[i-1].size <= stack[i].size) {
698: /* merge C into B */
699: goto mergeBC;
700: }
701: }
702: if (itemp == i) break;
703: }
704: *stacksize = i;
705: return(0);
706: }
708: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709: {
710: PetscInt i = *stacksize;
714: while (i) {
715: PetscInt l, m, r, itemp = i;
717: if (i == 1) {
718: /* A = stack[i-1], B = stack[i] */
719: if (stack[i-1].size < stack[i].size) {
720: /* if A[-1] <= B[0] then sorted */
721: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722: m = stack[i].start;
723: /* Search A for B[0] insertion */
724: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
725: /* Search B for A[-1] insertion */
726: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
727: if (m-l <= r-m) {
728: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
729: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
730: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
731: } else {
732: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
733: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
734: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
735: }
736: }
737: /* Update A with merge */
738: stack[i-1].size += stack[i].size;
739: --i;
740: }
741: } else {
742: /* i > 2, i.e. C exists
743: A = stack[i-2], B = stack[i-1], C = stack[i]; */
744: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745: if (stack[i-2].size < stack[i].size) {
746: /* merge B into A, but if A[-1] <= B[0] then already sorted */
747: if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748: m = stack[i-1].start;
749: /* Search A for B[0] insertion */
750: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
751: /* Search B for A[-1] insertion */
752: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
753: if (m-l <= r-m) {
754: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
755: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
756: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
757: } else {
758: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
759: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
760: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
761: }
762: }
763: /* Update A with merge */
764: stack[i-2].size += stack[i-1].size;
765: /* Push C up the stack */
766: stack[i-1].start = stack[i].start;
767: stack[i-1].size = stack[i].size;
768: } else {
769: /* merge C into B */
770: mergeBC:
771: /* If B[-1] <= C[0] then... you know the drill */
772: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773: m = stack[i].start;
774: /* Search B for C[0] insertion */
775: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
776: /* Search C for B[-1] insertion */
777: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
778: if (m-l <= r-m) {
779: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
780: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
781: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
782: } else {
783: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
784: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
785: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
786: }
787: }
788: /* Update B with merge */
789: stack[i-1].size += stack[i].size;
790: }
791: --i;
792: } else if (stack[i-1].size <= stack[i].size) {
793: /* merge C into B */
794: goto mergeBC;
795: }
796: }
797: if (itemp == i) break;
798: }
799: *stacksize = i;
800: return(0);
801: }
803: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805: binary insertion sort or regulat insertion sort */
806: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807: {
808: const PetscInt re = PetscMin(runstart+minrun, n-1);
809: PetscInt ri = runstart;
812: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
813: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815: ++ri;
816: while (ri < n-1) {
817: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818: ++ri;
819: }
820: {
821: PetscInt lo = runstart, hi = ri;
822: for (; lo < hi; ++lo, --hi) {
823: COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824: }
825: }
826: } else {
827: ++ri;
828: while (ri < n-1) {
829: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830: ++ri;
831: }
832: }
833: #if defined(PETSC_USE_DEBUG)
834: {
836: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
837: }
838: #endif
839: if (ri < re) {
840: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841: binary search */
842: if (ri-runstart <= minrun >> 1) {
843: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844: PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845: } else {
846: PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847: }
848: *runend = re;
849: } else *runend = ri;
850: return(0);
851: }
853: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854: {
855: const PetscInt re = PetscMin(runstart+minrun, n-1);
856: PetscInt ri = runstart;
859: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
860: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861: if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862: ++ri;
863: while (ri < n-1) {
864: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865: ++ri;
866: }
867: {
868: PetscInt lo = runstart, hi = ri;
869: for (; lo < hi; ++lo, --hi) {
870: COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871: }
872: }
873: } else {
874: ++ri;
875: while (ri < n-1) {
876: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877: ++ri;
878: }
879: }
880: #if defined(PETSC_USE_DEBUG)
881: {
883: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
884: }
885: #endif
886: if (ri < re) {
887: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888: binary search */
889: if (ri-runstart <= minrun >> 1) {
890: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891: PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892: } else {
893: PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894: }
895: *runend = re;
896: } else *runend = ri;
897: return(0);
898: }
900: /*@C
901: PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
903: Not Collective
905: Input Parameters:
906: + n - number of values
907: . arr - array to be sorted
908: . size - size in bytes of the datatype held in arr
909: . cmp - function pointer to comparison function
910: - ctx - optional context to be passed to comparison function, NULL if not needed
912: Output Parameters:
913: . arr - sorted array
915: Notes:
916: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919: do so it repeatedly triggers attempts throughout to merge adjacent runs.
921: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925: likely all contain similar data.
927: Sample usage:
928: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930: may also
931: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933: order
935: .vb
936: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937: my_type l = *(my_type *) left, r = *(my_type *) right;
938: return (l < r) ? -1 : (l > r);
939: }
940: .ve
941: Note the context is unused here but you may use it to pass and subsequently access whatever information required
942: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943: Then pass the function
944: .vb
945: PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946: .ve
948: Fortran Notes:
949: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950: returns result. For example
951: .vb
952: subroutine CompareIntegers(left,right,ctx,result)
953: implicit none
955: PetscInt,intent(in) :: left, right
956: type(UserCtx) :: ctx
957: integer,intent(out) :: result
959: if (left < right) then
960: result = -1
961: else if (left == right) then
962: result = 0
963: else
964: result = 1
965: end if
966: return
967: end subroutine CompareIntegers
968: .ve
970: References:
971: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
973: Level: developer
975: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976: @*/
977: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978: {
979: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
980: PetscTimSortStack runstack[128];
981: PetscTimSortBuffer buff;
982: PetscErrorCode ierr;
983: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984: It is so unlikely that this limit is reached that this is __never__ checked for */
987: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988: is a power of 2 or one plus a power of 2 */
989: {
990: PetscInt t = n, r = 0;
991: /* r becomes 1 if the least significant bits contain at least one off bit */
992: while (t >= 64) {
993: r |= t & 1;
994: t >>= 1;
995: }
996: minrun = t + r;
997: }
998: if (PetscDefined(USE_DEBUG)) {
999: PetscInfo1(NULL, "minrun = %D\n", minrun);
1000: if (n < 64) {
1001: PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1002: } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003: }
1004: PetscMalloc1((size_t) minrun*size, &buff.ptr);
1005: buff.size = (size_t) minrun*size;
1006: buff.maxsize = (size_t) n*size;
1007: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008: while (runstart < n) {
1009: /* Check if additional entries are at least partially ordered and build natural run */
1010: PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
1011: runstack[stacksize].start = runstart;
1012: runstack[stacksize].size = runend-runstart+1;
1013: PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
1014: ++stacksize;
1015: runstart = runend+1;
1016: }
1017: /* Have been inside while, so discard last stacksize++ */
1018: --stacksize;
1019: PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
1020: PetscFree(buff.ptr);
1021: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022: return(0);
1023: }
1025: /*@C
1026: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027: reorders a second array to match the first. The arrays need not be the same type.
1029: Not Collective
1031: Input Parameters:
1032: + n - number of values
1033: . arr - array to be sorted
1034: . asize - size in bytes of the datatype held in arr
1035: . barr - array to be reordered
1036: . asize - size in bytes of the datatype held in barr
1037: . cmp - function pointer to comparison function
1038: - ctx - optional context to be passed to comparison function, NULL if not needed
1040: Output Parameters:
1041: + arr - sorted array
1042: - barr - reordered array
1044: Notes:
1045: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046: overlap.
1048: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1053: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057: likely all contain similar data.
1059: Sample usage:
1060: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064: increasing order
1066: .vb
1067: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068: my_type l = *(my_type *) left, r = *(my_type *) right;
1069: return (l < r) ? -1 : (l > r);
1070: }
1071: .ve
1072: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074: Then pass the function
1075: .vb
1076: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077: .ve
1080: Fortran Notes:
1081: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082: returns result. For example
1083: .vb
1084: subroutine CompareIntegers(left,right,ctx,result)
1085: implicit none
1087: PetscInt,intent(in) :: left, right
1088: type(UserCtx) :: ctx
1089: integer,intent(out) :: result
1091: if (left < right) then
1092: result = -1
1093: else if (left == right) then
1094: result = 0
1095: else
1096: result = 1
1097: end if
1098: return
1099: end subroutine CompareIntegers
1100: .ve
1102: References:
1103: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1105: Level: developer
1107: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108: @*/
1109: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1110: {
1111: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1112: PetscTimSortStack runstack[128];
1113: PetscTimSortBuffer abuff, bbuff;
1114: PetscErrorCode ierr;
1115: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116: It is so unlikely that this limit is reached that this is __never__ checked for */
1119: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120: is a power of 2 or one plus a power of 2 */
1121: {
1122: PetscInt t = n, r = 0;
1123: /* r becomes 1 if the least significant bits contain at least one off bit */
1124: while (t >= 64) {
1125: r |= t & 1;
1126: t >>= 1;
1127: }
1128: minrun = t + r;
1129: }
1130: if (PetscDefined(USE_DEBUG)) {
1131: PetscInfo1(NULL, "minrun = %D\n", minrun);
1132: if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1133: }
1134: PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1135: abuff.size = (size_t) minrun*asize;
1136: abuff.maxsize = (size_t) n*asize;
1137: PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1138: bbuff.size = (size_t) minrun*bsize;
1139: bbuff.maxsize = (size_t) n*bsize;
1140: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1141: while (runstart < n) {
1142: /* Check if additional entries are at least partially ordered and build natural run */
1143: PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1144: runstack[stacksize].start = runstart;
1145: runstack[stacksize].size = runend-runstart+1;
1146: PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1147: ++stacksize;
1148: runstart = runend+1;
1149: }
1150: /* Have been inside while, so discard last stacksize++ */
1151: --stacksize;
1152: PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1153: PetscFree(abuff.ptr);
1154: PetscFree(bbuff.ptr);
1155: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1156: return(0);
1157: }
1159: /*@
1160: PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1162: Not Collective
1164: Input Parameters:
1165: + n - number of values
1166: - arr - array of integers
1168: Output Parameters:
1169: . arr - sorted array of integers
1171: Notes:
1172: If the array is less than 64 entries long PetscSortInt() is automatically used.
1174: This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1175: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1176: recomended that the user benchmark their code to see which routine is fastest.
1178: Level: intermediate
1180: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1181: @*/
1182: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1183: {
1186: if (n <= 1) return(0);
1188: if (n < 64) {
1189: PetscSortInt(n, arr);
1190: } else {
1191: PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1192: }
1193: return(0);
1194: }
1196: /*@
1197: PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1198: array to match the first.
1200: Not Collective
1202: Input Parameters:
1203: + n - number of values
1204: . arr1 - array of integers to be sorted
1205: - arr2 - array of integers to be reordered
1207: Output Parameters:
1208: + arr1 - sorted array of integers
1209: - arr2 - reordered array of integers
1211: Notes:
1212: The arrays CANNOT overlap.
1214: This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1215: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1216: recomended that the user benchmark their code to see which routine is fastest.
1218: Level: intermediate
1220: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1221: @*/
1222: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1223: {
1226: if (n <= 1) return(0);
1229: /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1230: PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1231: return(0);
1232: }
1234: /*@
1235: PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1237: Not Collective
1239: Input Parameters:
1240: + n - number of values
1241: - arr - array of PetscMPIInts
1243: Output Parameters:
1244: . arr - sorted array of integers
1246: Notes:
1247: If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1249: This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1250: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1251: recomended that the user benchmark their code to see which routine is fastest.
1253: Level: intermediate
1255: .seealso: PetscTimSort(), PetscSortMPIInt()
1256: @*/
1257: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1258: {
1259: PetscErrorCode ierr;
1262: if (n <= 1) return(0);
1264: if (n < 64) {
1265: PetscSortMPIInt(n, arr);
1266: } else {
1267: PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1268: }
1269: return(0);
1270: }
1272: /*@
1273: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1274: array to match the first.
1276: Not Collective
1278: Input Parameters:
1279: + n - number of values
1280: . arr1 - array of integers to be sorted
1281: - arr2 - array of integers to be reordered
1283: Output Parameters:
1284: + arr1 - sorted array of integers
1285: - arr2 - reordered array of integers
1287: Notes:
1288: The arrays CANNOT overlap.
1290: This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1291: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1292: recomended that the user benchmark their code to see which routine is fastest.
1294: Level: intermediate
1296: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1297: @*/
1298: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1299: {
1302: if (n <= 1) return(0);
1305: /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1306: PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1307: return(0);
1308: }
1310: /*@
1311: PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1313: Not Collective
1315: Input Parameters:
1316: + n - number of values
1317: - arr - array of PetscReals
1319: Output Parameters:
1320: . arr - sorted array of integers
1322: Notes:
1323: If the array is less than 64 entries long PetscSortReal() is automatically used.
1325: This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1326: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1327: recomended that the user benchmark their code to see which routine is fastest.
1329: Level: intermediate
1331: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1332: @*/
1333: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1334: {
1335: PetscErrorCode ierr;
1338: if (n <= 1) return(0);
1340: if (n < 64) {
1341: PetscSortReal(n, arr);
1342: } else {
1343: PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1344: }
1345: return(0);
1346: }
1348: /*@
1349: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1350: array of PetscInts to match the first.
1352: Not Collective
1354: Input Parameters:
1355: + n - number of values
1356: . arr1 - array of PetscReals to be sorted
1357: - arr2 - array of PetscReals to be reordered
1359: Output Parameters:
1360: + arr1 - sorted array of PetscReals
1361: - arr2 - reordered array of PetscInts
1363: Notes:
1364: This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1365: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1366: recomended that the user benchmark their code to see which routine is fastest.
1368: Level: intermediate
1370: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1371: @*/
1372: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1373: {
1376: if (n <= 1) return(0);
1379: /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1380: PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1381: return(0);
1382: }