Actual source code: sortso.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5: {
6: PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7: return (l < r) ? -1 : (l > r);
8: }
10: PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11: {
12: PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13: return (l < r) ? -1 : (l > r);
14: }
16: PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17: {
18: PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8
23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37: {
38: __builtin_memcpy(t, ar, asize);
39: __builtin_memmove(ar, al, asize);
40: __builtin_memcpy(al, t, asize);
41: __builtin_memcpy(t, br, bsize);
42: __builtin_memmove(br, bl, bsize);
43: __builtin_memcpy(bl, t, bsize);
44: return;
45: }
47: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, bsrc, bsize);
57: return;
58: }
60: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: # else
73: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74: {
77: PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78: PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79: PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80: PetscFunctionReturnVoid();
81: }
83: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84: {
87: PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88: PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89: PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90: PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91: PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92: PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93: PetscFunctionReturnVoid();
94: }
96: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97: {
100: PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101: PetscFunctionReturnVoid();
102: }
104: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105: {
108: PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109: PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110: PetscFunctionReturnVoid();
111: }
113: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114: {
117: PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118: PetscFunctionReturnVoid();
119: }
121: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122: {
125: PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126: PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127: PetscFunctionReturnVoid();
128: }
129: #endif
131: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132: x. Output also inclusive.
134: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
136: {0,1,2,3,4,5}
138: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139: */
140: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141: {
142: PetscInt last = l, k = 1, mid, cur = l+1;
145: *m = l;
146: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147: if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return(0);}
148: if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return(0);
149: while (PETSC_TRUE) {
150: if (PetscUnlikely(cur > r)) {cur = r; break;}
151: if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152: last = cur;
153: cur += (k <<= 1) + 1;
154: ++k;
155: }
156: /* standard binary search but take last 0 mid 0 cur 1 into account*/
157: while (cur > last + 1) {
158: mid = last + ((cur - last) >> 1);
159: if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160: cur = mid;
161: } else {
162: last = mid;
163: }
164: }
165: *m = cur;
166: return(0);
167: }
169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170: < x. Output also inclusive */
171: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172: {
173: PetscInt last = r, k = 1, mid, cur = r-1;
176: *m = r;
177: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178: if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return(0);}
179: if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return(0);
180: while (PETSC_TRUE) {
181: if (PetscUnlikely(cur < l)) {cur = l; break;}
182: if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183: last = cur;
184: cur -= (k <<= 1) + 1;
185: ++k;
186: }
187: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188: while (last > cur + 1) {
189: mid = last - ((last - cur) >> 1);
190: if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191: cur = mid;
192: } else {
193: last = mid;
194: }
195: }
196: *m = cur;
197: return(0);
198: }
200: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201: complete array, left is first index of left array, mid is first index of right array, right is last index of right
202: array */
203: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204: {
205: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206: const PetscInt llen = mid-left;
210: Petsc_memcpy(tarr, arr+(left*size), llen*size);
211: while ((i < llen) && (j <= right)) {
212: if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213: Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214: ++k;
215: gallopright = 0;
216: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217: PetscInt l1, l2, diff1, diff2;
218: do {
219: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220: /* search temp for right[j], can move up to that of temp into arr immediately */
221: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
222: diff1 = l1-i;
223: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224: k += diff1;
225: i = l1;
226: /* search right for temp[i], can move up to that many of right into arr */
227: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
228: diff2 = l2-j;
229: Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230: k += diff2;
231: j = l2;
232: if (i >= llen || j > right) break;
233: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234: ++MIN_GALLOP_GLOBAL;
235: }
236: } else {
237: Petsc_memmove(arr+(k*size), arr+(j*size), size);
238: ++k;
239: gallopleft = 0;
240: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241: PetscInt l1, l2, diff1, diff2;
242: do {
243: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244: /* search right for temp[i], can move up to that many of right into arr */
245: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
246: diff2 = l2-j;
247: Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248: k += diff2;
249: j = l2;
250: /* search temp for right[j], can copy up to that of temp into arr immediately */
251: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
252: diff1 = l1-i;
253: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254: k += diff1;
255: i = l1;
256: if (i >= llen || j > right) break;
257: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258: ++MIN_GALLOP_GLOBAL;
259: }
260: }
261: }
262: if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263: return(0);
264: }
266: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267: {
268: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269: const PetscInt llen = mid-left;
273: Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274: while ((i < llen) && (j <= right)) {
275: if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277: ++k;
278: gallopright = 0;
279: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280: PetscInt l1, l2, diff1, diff2;
281: do {
282: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283: /* search temp for right[j], can move up to that of temp into arr immediately */
284: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
285: diff1 = l1-i;
286: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287: k += diff1;
288: i = l1;
289: /* search right for temp[i], can move up to that many of right into arr */
290: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291: diff2 = l2-j;
292: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293: k += diff2;
294: j = l2;
295: if (i >= llen || j > right) break;
296: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297: ++MIN_GALLOP_GLOBAL;
298: }
299: } else {
300: Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301: ++k;
302: gallopleft = 0;
303: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304: PetscInt l1, l2, diff1, diff2;
305: do {
306: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307: /* search right for temp[i], can move up to that many of right into arr */
308: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
309: diff2 = l2-j;
310: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311: k += diff2;
312: j = l2;
313: /* search temp for right[j], can copy up to that of temp into arr immediately */
314: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
315: diff1 = l1-i;
316: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317: k += diff1;
318: i = l1;
319: if (i >= llen || j > right) break;
320: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321: ++MIN_GALLOP_GLOBAL;
322: }
323: }
324: }
325: if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326: return(0);
327: }
329: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330: complete array, left is first index of left array, mid is first index of right array, right is last index of right
331: array */
332: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333: {
334: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335: const PetscInt rlen = right-mid+1;
339: Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340: while ((i >= 0) && (j >= left)) {
341: if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342: Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343: --k;
344: gallopleft = 0;
345: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346: PetscInt l1, l2, diff1, diff2;
347: do {
348: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349: /* search temp for left[j], can copy up to that many of temp into arr */
350: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
351: diff1 = i-l1;
352: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353: k -= diff1;
354: i = l1;
355: /* search left for temp[i], can move up to that many of left up arr */
356: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
357: diff2 = j-l2;
358: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359: k -= diff2;
360: j = l2;
361: if (i < 0 || j < left) break;
362: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363: ++MIN_GALLOP_GLOBAL;
364: }
365: } else {
366: Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367: --k;
368: gallopright = 0;
369: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370: PetscInt l1, l2, diff1, diff2;
371: do {
372: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373: /* search left for temp[i], can move up to that many of left up arr */
374: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
375: diff2 = j-l2;
376: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377: k -= diff2;
378: j = l2;
379: /* search temp for left[j], can copy up to that many of temp into arr */
380: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
381: diff1 = i-l1;
382: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383: k -= diff1;
384: i = l1;
385: if (i < 0 || j < left) break;
386: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387: ++MIN_GALLOP_GLOBAL;
388: }
389: }
390: }
391: if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392: return(0);
393: }
395: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396: {
397: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398: const PetscInt rlen = right-mid+1;
402: Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403: while ((i >= 0) && (j >= left)) {
404: if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405: Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406: --k;
407: gallopleft = 0;
408: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409: PetscInt l1, l2, diff1, diff2;
410: do {
411: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412: /* search temp for left[j], can copy up to that many of temp into arr */
413: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
414: diff1 = i-l1;
415: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416: k -= diff1;
417: i = l1;
418: /* search left for temp[i], can move up to that many of left up arr */
419: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
420: diff2 = j-l2;
421: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422: k -= diff2;
423: j = l2;
424: if (i < 0 || j < left) break;
425: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426: ++MIN_GALLOP_GLOBAL;
427: }
428: } else {
429: Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430: --k;
431: gallopright = 0;
432: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433: PetscInt l1, l2, diff1, diff2;
434: do {
435: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436: /* search left for temp[i], can move up to that many of left up arr */
437: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
438: diff2 = j-l2;
439: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440: k -= diff2;
441: j = l2;
442: /* search temp for left[j], can copy up to that many of temp into arr */
443: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
444: diff1 = i-l1;
445: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446: k -= diff1;
447: i = l1;
448: if (i < 0 || j < left) break;
449: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450: ++MIN_GALLOP_GLOBAL;
451: }
452: }
453: }
454: if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455: return(0);
456: }
458: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461: {
462: PetscInt i = start == left ? start+1 : start;
465: for (; i <= right; ++i) {
466: PetscInt j = i-1;
467: Petsc_memcpy(tarr, arr+(i*size), size);
468: while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469: COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470: --j;
471: }
472: Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473: }
474: return(0);
475: }
477: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478: {
479: PetscInt i = start == left ? start+1 : start;
482: for (; i <= right; ++i) {
483: PetscInt j = i-1;
484: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485: while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486: COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487: --j;
488: }
489: Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490: }
491: return(0);
492: }
494: /* See PetscInsertionSort_Private */
495: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496: {
497: PetscInt i = start == left ? start+1 : start;
500: for (; i <= right; ++i) {
501: PetscInt l = left, r = i;
502: Petsc_memcpy(tarr, arr+(i*size), size);
503: do {
504: const PetscInt m = l + ((r - l) >> 1);
505: if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506: r = m;
507: } else {
508: l = m + 1;
509: }
510: } while (l < r);
511: Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512: Petsc_memcpy(arr+(l*size), tarr, size);
513: }
514: return(0);
515: }
517: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518: {
519: PetscInt i = start == left ? start+1 : start;
522: for (; i <= right; ++i) {
523: PetscInt l = left, r = i;
524: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525: do {
526: const PetscInt m = l + ((r - l) >> 1);
527: if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528: r = m;
529: } else {
530: l = m + 1;
531: }
532: } while (l < r);
533: Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534: Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535: }
536: return(0);
537: }
539: typedef struct {
540: PetscInt size;
541: PetscInt start;
542: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
543: } PetscTimSortStack;
544: #else
545: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
546: #endif
548: typedef struct {
549: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
550: size_t size;
551: size_t maxsize;
552: } PetscTimSortBuffer;
554: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
555: {
557: if (PetscLikely(newSize <= buff->size)) return(0);
558: {
559: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
561: size_t newMax = PetscMin(newSize*newSize, buff->maxsize);
562: PetscFree(buff->ptr);
563: PetscMalloc1(newMax, &buff->ptr);
564: buff->size = newMax;
565: }
566: return(0);
567: }
569: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
570: {
572: for (;stacksize; --stacksize) {
573: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
574: if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
575: PetscInt l, m = stack[stacksize].start, r;
577: /* Search A for B[0] insertion */
578: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
579: /* Search B for A[-1] insertion */
580: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
581: if (m-l <= r-m) {
582: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
583: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
584: } else {
585: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
586: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
587: }
588: }
589: /* Update A with merge */
590: stack[stacksize-1].size += stack[stacksize].size;
591: }
592: return(0);
593: }
595: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596: {
598: for (;stacksize; --stacksize) {
599: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600: if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
601: PetscInt l, m = stack[stacksize].start, r;
603: /* Search A for B[0] insertion */
604: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
605: /* Search B for A[-1] insertion */
606: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
607: if (m-l <= r-m) {
608: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
609: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
610: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
611: } else {
612: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
613: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
614: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
615: }
616: }
617: /* Update A with merge */
618: stack[stacksize-1].size += stack[stacksize].size;
619: }
620: return(0);
621: }
623: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
624: {
625: PetscInt i = *stacksize;
629: while (i) {
630: PetscInt l, m, r, itemp = i;
632: if (i == 1) {
633: /* A = stack[i-1], B = stack[i] */
634: if (stack[i-1].size < stack[i].size) {
635: /* if A[-1] <= B[0] then sorted */
636: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
637: m = stack[i].start;
638: /* Search A for B[0] insertion */
639: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
640: /* Search B for A[-1] insertion */
641: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
642: if (m-l <= r-m) {
643: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
644: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
645: } else {
646: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
647: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
648: }
649: }
650: /* Update A with merge */
651: stack[i-1].size += stack[i].size;
652: --i;
653: }
654: } else {
655: /* i > 2, i.e. C exists
656: A = stack[i-2], B = stack[i-1], C = stack[i]; */
657: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
658: if (stack[i-2].size < stack[i].size) {
659: /* merge B into A, but if A[-1] <= B[0] then already sorted */
660: if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
661: m = stack[i-1].start;
662: /* Search A for B[0] insertion */
663: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
664: /* Search B for A[-1] insertion */
665: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
666: if (m-l <= r-m) {
667: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
668: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
669: } else {
670: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
671: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
672: }
673: }
674: /* Update A with merge */
675: stack[i-2].size += stack[i-1].size;
676: /* Push C up the stack */
677: stack[i-1].start = stack[i].start;
678: stack[i-1].size = stack[i].size;
679: } else {
680: /* merge C into B */
681: mergeBC:
682: /* If B[-1] <= C[0] then... you know the drill */
683: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
684: m = stack[i].start;
685: /* Search B for C[0] insertion */
686: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
687: /* Search C for B[-1] insertion */
688: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
689: if (m-l <= r-m) {
690: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
691: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
692: } else {
693: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
694: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
695: }
696: }
697: /* Update B with merge */
698: stack[i-1].size += stack[i].size;
699: }
700: --i;
701: } else if (stack[i-1].size <= stack[i].size) {
702: /* merge C into B */
703: goto mergeBC;
704: }
705: }
706: if (itemp == i) break;
707: }
708: *stacksize = i;
709: return(0);
710: }
712: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
713: {
714: PetscInt i = *stacksize;
718: while (i) {
719: PetscInt l, m, r, itemp = i;
721: if (i == 1) {
722: /* A = stack[i-1], B = stack[i] */
723: if (stack[i-1].size < stack[i].size) {
724: /* if A[-1] <= B[0] then sorted */
725: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
726: m = stack[i].start;
727: /* Search A for B[0] insertion */
728: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
729: /* Search B for A[-1] insertion */
730: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
731: if (m-l <= r-m) {
732: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
733: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
734: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
735: } else {
736: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
737: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
738: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
739: }
740: }
741: /* Update A with merge */
742: stack[i-1].size += stack[i].size;
743: --i;
744: }
745: } else {
746: /* i > 2, i.e. C exists
747: A = stack[i-2], B = stack[i-1], C = stack[i]; */
748: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
749: if (stack[i-2].size < stack[i].size) {
750: /* merge B into A, but if A[-1] <= B[0] then already sorted */
751: if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
752: m = stack[i-1].start;
753: /* Search A for B[0] insertion */
754: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
755: /* Search B for A[-1] insertion */
756: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
757: if (m-l <= r-m) {
758: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
759: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
760: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
761: } else {
762: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
763: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
764: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
765: }
766: }
767: /* Update A with merge */
768: stack[i-2].size += stack[i-1].size;
769: /* Push C up the stack */
770: stack[i-1].start = stack[i].start;
771: stack[i-1].size = stack[i].size;
772: } else {
773: /* merge C into B */
774: mergeBC:
775: /* If B[-1] <= C[0] then... you know the drill */
776: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
777: m = stack[i].start;
778: /* Search B for C[0] insertion */
779: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
780: /* Search C for B[-1] insertion */
781: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
782: if (m-l <= r-m) {
783: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
784: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
785: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
786: } else {
787: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
788: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
789: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
790: }
791: }
792: /* Update B with merge */
793: stack[i-1].size += stack[i].size;
794: }
795: --i;
796: } else if (stack[i-1].size <= stack[i].size) {
797: /* merge C into B */
798: goto mergeBC;
799: }
800: }
801: if (itemp == i) break;
802: }
803: *stacksize = i;
804: return(0);
805: }
807: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
808: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
809: binary insertion sort or regulat insertion sort */
810: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
811: {
812: const PetscInt re = PetscMin(runstart+minrun, n-1);
813: PetscInt ri = runstart;
816: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
817: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
819: ++ri;
820: while (ri < n-1) {
821: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
822: ++ri;
823: }
824: {
825: PetscInt lo = runstart, hi = ri;
826: for (; lo < hi; ++lo, --hi) {
827: COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
828: }
829: }
830: } else {
831: ++ri;
832: while (ri < n-1) {
833: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
834: ++ri;
835: }
836: }
837: #if defined(PETSC_USE_DEBUG)
838: {
840: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
841: }
842: #endif
843: if (ri < re) {
844: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
845: binary search */
846: if (ri-runstart <= minrun >> 1) {
847: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
848: PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
849: } else {
850: PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
851: }
852: *runend = re;
853: } else *runend = ri;
854: return(0);
855: }
857: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
858: {
859: const PetscInt re = PetscMin(runstart+minrun, n-1);
860: PetscInt ri = runstart;
863: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
864: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
865: if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
866: ++ri;
867: while (ri < n-1) {
868: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
869: ++ri;
870: }
871: {
872: PetscInt lo = runstart, hi = ri;
873: for (; lo < hi; ++lo, --hi) {
874: COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
875: }
876: }
877: } else {
878: ++ri;
879: while (ri < n-1) {
880: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
881: ++ri;
882: }
883: }
884: #if defined(PETSC_USE_DEBUG)
885: {
887: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
888: }
889: #endif
890: if (ri < re) {
891: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
892: binary search */
893: if (ri-runstart <= minrun >> 1) {
894: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
895: PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
896: } else {
897: PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
898: }
899: *runend = re;
900: } else *runend = ri;
901: return(0);
902: }
904: /*@C
905: PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
907: Not Collective
909: Input Parameters:
910: + n - number of values
911: . arr - array to be sorted
912: . size - size in bytes of the datatype held in arr
913: . cmp - function pointer to comparison function
914: - ctx - optional context to be passed to comparison function, NULL if not needed
916: Output Parameters:
917: . arr - sorted array
919: Notes:
920: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
921: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
922: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
923: do so it repeatedly triggers attempts throughout to merge adjacent runs.
925: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
926: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
927: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
928: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
929: likely all contain similar data.
931: Sample usage:
932: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
933: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
934: may also
935: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
936: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
937: order
939: .vb
940: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
941: my_type l = *(my_type *) left, r = *(my_type *) right;
942: return (l < r) ? -1 : (l > r);
943: }
944: .ve
945: Note the context is unused here but you may use it to pass and subsequently access whatever information required
946: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
947: Then pass the function
948: .vb
949: PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
950: .ve
952: Fortran Notes:
953: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
954: returns result. For example
955: .vb
956: subroutine CompareIntegers(left,right,ctx,result)
957: implicit none
959: PetscInt,intent(in) :: left, right
960: type(UserCtx) :: ctx
961: integer,intent(out) :: result
963: if (left < right) then
964: result = -1
965: else if (left == right) then
966: result = 0
967: else
968: result = 1
969: end if
970: return
971: end subroutine CompareIntegers
972: .ve
974: References:
975: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
977: Level: developer
979: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
980: @*/
981: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
982: {
983: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
984: PetscTimSortStack runstack[128];
985: PetscTimSortBuffer buff;
986: PetscErrorCode ierr;
987: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
988: It is so unlikely that this limit is reached that this is __never__ checked for */
991: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
992: is a power of 2 or one plus a power of 2 */
993: {
994: PetscInt t = n, r = 0;
995: /* r becomes 1 if the least significant bits contain at least one off bit */
996: while (t >= 64) {
997: r |= t & 1;
998: t >>= 1;
999: }
1000: minrun = t + r;
1001: }
1002: if (PetscDefined(USE_DEBUG)) {
1003: PetscInfo1(NULL, "minrun = %D\n", minrun);
1004: if (n < 64) {
1005: PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1006: } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1007: }
1008: PetscMalloc1((size_t) minrun*size, &buff.ptr);
1009: buff.size = (size_t) minrun*size;
1010: buff.maxsize = (size_t) n*size;
1011: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1012: while (runstart < n) {
1013: /* Check if additional entries are at least partially ordered and build natural run */
1014: PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
1015: runstack[stacksize].start = runstart;
1016: runstack[stacksize].size = runend-runstart+1;
1017: PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
1018: ++stacksize;
1019: runstart = runend+1;
1020: }
1021: /* Have been inside while, so discard last stacksize++ */
1022: --stacksize;
1023: PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
1024: PetscFree(buff.ptr);
1025: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1026: return(0);
1027: }
1029: /*@C
1030: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1031: reorders a second array to match the first. The arrays need not be the same type.
1033: Not Collective
1035: Input Parameters:
1036: + n - number of values
1037: . asize - size in bytes of the datatype held in arr
1038: . bsize - size in bytes of the datatype held in barr
1039: . cmp - function pointer to comparison function
1040: - ctx - optional context to be passed to comparison function, NULL if not needed
1042: Input/Output Parameters:
1043: + arr - array to be sorted, on output it is sorted
1044: - barr - array to be reordered, on output it is reordered
1046: Notes:
1047: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1048: overlap.
1050: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1051: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1052: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1053: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1055: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1056: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1057: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1058: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1059: likely all contain similar data.
1061: Sample usage:
1062: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1063: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1064: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1065: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1066: increasing order
1068: .vb
1069: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1070: my_type l = *(my_type *) left, r = *(my_type *) right;
1071: return (l < r) ? -1 : (l > r);
1072: }
1073: .ve
1074: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1075: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1076: Then pass the function
1077: .vb
1078: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1079: .ve
1081: Fortran Notes:
1082: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1083: returns result. For example
1084: .vb
1085: subroutine CompareIntegers(left,right,ctx,result)
1086: implicit none
1088: PetscInt,intent(in) :: left, right
1089: type(UserCtx) :: ctx
1090: integer,intent(out) :: result
1092: if (left < right) then
1093: result = -1
1094: else if (left == right) then
1095: result = 0
1096: else
1097: result = 1
1098: end if
1099: return
1100: end subroutine CompareIntegers
1101: .ve
1103: References:
1104: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1106: Level: developer
1108: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1109: @*/
1110: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1111: {
1112: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1113: PetscTimSortStack runstack[128];
1114: PetscTimSortBuffer abuff, bbuff;
1115: PetscErrorCode ierr;
1116: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1117: It is so unlikely that this limit is reached that this is __never__ checked for */
1120: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1121: is a power of 2 or one plus a power of 2 */
1122: {
1123: PetscInt t = n, r = 0;
1124: /* r becomes 1 if the least significant bits contain at least one off bit */
1125: while (t >= 64) {
1126: r |= t & 1;
1127: t >>= 1;
1128: }
1129: minrun = t + r;
1130: }
1131: if (PetscDefined(USE_DEBUG)) {
1132: PetscInfo1(NULL, "minrun = %D\n", minrun);
1133: if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1134: }
1135: PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1136: abuff.size = (size_t) minrun*asize;
1137: abuff.maxsize = (size_t) n*asize;
1138: PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1139: bbuff.size = (size_t) minrun*bsize;
1140: bbuff.maxsize = (size_t) n*bsize;
1141: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1142: while (runstart < n) {
1143: /* Check if additional entries are at least partially ordered and build natural run */
1144: PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1145: runstack[stacksize].start = runstart;
1146: runstack[stacksize].size = runend-runstart+1;
1147: PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1148: ++stacksize;
1149: runstart = runend+1;
1150: }
1151: /* Have been inside while, so discard last stacksize++ */
1152: --stacksize;
1153: PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1154: PetscFree(abuff.ptr);
1155: PetscFree(bbuff.ptr);
1156: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1157: return(0);
1158: }
1160: /*@
1161: PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1163: Not Collective
1165: Input Parameters:
1166: + n - number of values
1167: - arr - array of integers
1169: Output Parameters:
1170: . arr - sorted array of integers
1172: Notes:
1173: If the array is less than 64 entries long PetscSortInt() is automatically used.
1175: This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1176: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1177: recommended that the user benchmark their code to see which routine is fastest.
1179: Level: intermediate
1181: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1182: @*/
1183: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1184: {
1187: if (n <= 1) return(0);
1189: if (n < 64) {
1190: PetscSortInt(n, arr);
1191: } else {
1192: PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1193: }
1194: return(0);
1195: }
1197: /*@
1198: PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1199: array to match the first.
1201: Not Collective
1203: Input Parameter:
1204: . n - number of values
1206: Input/Output Parameters:
1207: + arr1 - array of integers to be sorted, modified on output
1208: - arr2 - array of integers to be reordered, modified on output
1210: Notes:
1211: The arrays CANNOT overlap.
1213: This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1214: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1215: recommended that the user benchmark their code to see which routine is fastest.
1217: Level: intermediate
1219: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1220: @*/
1221: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1222: {
1225: if (n <= 1) return(0);
1228: /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1229: PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1230: return(0);
1231: }
1233: /*@
1234: PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1236: Not Collective
1238: Input Parameters:
1239: + n - number of values
1240: - arr - array of PetscMPIInts
1242: Output Parameters:
1243: . arr - sorted array of integers
1245: Notes:
1246: If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1248: This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1249: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1250: recommended that the user benchmark their code to see which routine is fastest.
1252: Level: intermediate
1254: .seealso: PetscTimSort(), PetscSortMPIInt()
1255: @*/
1256: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1257: {
1258: PetscErrorCode ierr;
1261: if (n <= 1) return(0);
1263: if (n < 64) {
1264: PetscSortMPIInt(n, arr);
1265: } else {
1266: PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1267: }
1268: return(0);
1269: }
1271: /*@
1272: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1273: array to match the first.
1275: Not Collective
1277: Input Parameter:
1278: . n - number of values
1280: Input/Output Parameters:
1281: . arr1 - array of integers to be sorted, modified on output
1282: - arr2 - array of integers to be reordered, modified on output
1284: Notes:
1285: The arrays CANNOT overlap.
1287: This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1288: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1289: recommended that the user benchmark their code to see which routine is fastest.
1291: Level: intermediate
1293: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1294: @*/
1295: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1296: {
1299: if (n <= 1) return(0);
1302: /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1303: PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1304: return(0);
1305: }
1307: /*@
1308: PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1310: Not Collective
1312: Input Parameters:
1313: + n - number of values
1314: - arr - array of PetscReals
1316: Output Parameters:
1317: . arr - sorted array of integers
1319: Notes:
1320: If the array is less than 64 entries long PetscSortReal() is automatically used.
1322: This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1323: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1324: recommended that the user benchmark their code to see which routine is fastest.
1326: Level: intermediate
1328: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1329: @*/
1330: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1331: {
1332: PetscErrorCode ierr;
1335: if (n <= 1) return(0);
1337: if (n < 64) {
1338: PetscSortReal(n, arr);
1339: } else {
1340: PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1341: }
1342: return(0);
1343: }
1345: /*@
1346: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1347: array of PetscInts to match the first.
1349: Not Collective
1351: Input Parameter:
1352: . n - number of values
1354: Input/Output Parameters:
1355: . arr1 - array of PetscReals to be sorted, modified on output
1356: - arr2 - array of PetscReals to be reordered, modified on output
1358: Notes:
1359: This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1360: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1361: recommended that the user benchmark their code to see which routine is fastest.
1363: Level: intermediate
1365: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1366: @*/
1367: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1368: {
1371: if (n <= 1) return(0);
1374: /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1375: PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1376: return(0);
1377: }