1: #include <petscsys.h> 2: #include <petsc/private/petscimpl.h> 4: PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5: {
6: PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7: return (l < r) ? -1 : (l > r);
8: }
10: PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11: {
12: PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13: return (l < r) ? -1 : (l > r);
14: }
16: PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17: {
18: PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8 23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t) 37: {
38: __builtin_memcpy(t, ab, asize);
39: __builtin_memmove(ab, aa, asize);
40: __builtin_memcpy(aa, t, asize);
41: __builtin_memcpy(t, bb, bsize);
42: __builtin_memmove(bb, ba, bsize);
43: __builtin_memcpy(ba, t, bsize);
44: return;
45: }
47: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size) 48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, asrc, bsize);
57: return;
58: }
60: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size) 61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: # else
73: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74: {
77: PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78: PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79: PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80: PetscFunctionReturnVoid();
81: }
83: PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t) 84: {
87: PetscMemcpy(t, ab, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88: PetscMemmove(ab, aa, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89: PetscMemcpy(aa, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90: PetscMemcpy(t, bb, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91: PetscMemmove(bb, ba, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92: PetscMemcpy(ba, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93: PetscFunctionReturnVoid();
94: }
96: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size) 97: {
100: PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101: PetscFunctionReturnVoid();
102: }
104: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)105: {
108: PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109: PetscMemcpy(bdest, asrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110: PetscFunctionReturnVoid();
111: }
113: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)114: {
117: PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118: PetscFunctionReturnVoid();
119: }
121: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)122: {
125: PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126: PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127: PetscFunctionReturnVoid();
128: }
129: #endif
131: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132: x. Output also inclusive.
134: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
136: {0,1,2,3,4,5}
138: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139: */
140: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)141: {
142: PetscInt last = l, k = 1, mid, cur = l+1;
145: *m = l;
146: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147: if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return(0);}
148: if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return(0);
149: while (PETSC_TRUE) {
150: if (PetscUnlikely(cur > r)) {cur = r; break;}
151: if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152: last = cur;
153: cur += (k <<= 1) + 1;
154: ++k;
155: }
156: /* standard binary search but take last 0 mid 0 cur 1 into account*/
157: while (cur > last + 1) {
158: mid = last + ((cur - last) >> 1);
159: if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160: cur = mid;
161: } else {
162: last = mid;
163: }
164: }
165: *m = cur;
166: return(0);
167: }
169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170: < x. Output also inclusive */
171: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)172: {
173: PetscInt last = r, k = 1, mid, cur = r-1;
176: *m = r;
177: if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178: if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return(0);}
179: if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return(0);
180: while (PETSC_TRUE) {
181: if (PetscUnlikely(cur < l)) {cur = l; break;}
182: if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183: last = cur;
184: cur -= (k <<= 1) + 1;
185: ++k;
186: }
187: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188: while (last > cur + 1) {
189: mid = last - ((last - cur) >> 1);
190: if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191: cur = mid;
192: } else {
193: last = mid;
194: }
195: }
196: *m = cur;
197: return(0);
198: }
200: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201: complete array, left is first index of left array, mid is first index of right array, right is last index of right
202: array */
203: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)204: {
205: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206: const PetscInt llen = mid-left;
210: Petsc_memcpy(tarr, arr+(left*size), llen*size);
211: while ((i < llen) && (j <= right)) {
212: if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213: Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214: ++k;
215: gallopright = 0;
216: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217: PetscInt l1, l2, diff1, diff2;
218: do {
219: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220: /* search temp for right[j], can move up to that of temp into arr immediately */
221: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
222: diff1 = l1-i;
223: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224: k += diff1;
225: i = l1;
226: /* search right for temp[i], can move up to that many of right into arr */
227: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
228: diff2 = l2-j;
229: Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230: k += diff2;
231: j = l2;
232: if (i >= llen || j > right) break;
233: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234: ++MIN_GALLOP_GLOBAL;
235: }
236: } else {
237: Petsc_memmove(arr+(k*size), arr+(j*size), size);
238: ++k;
239: gallopleft = 0;
240: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241: PetscInt l1, l2, diff1, diff2;
242: do {
243: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244: /* search right for temp[i], can move up to that many of right into arr */
245: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
246: diff2 = l2-j;
247: Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248: k += diff2;
249: j = l2;
250: /* search temp for right[j], can copy up to that of temp into arr immediately */
251: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
252: diff1 = l1-i;
253: Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254: k += diff1;
255: i = l1;
256: if (i >= llen || j > right) break;
257: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258: ++MIN_GALLOP_GLOBAL;
259: }
260: }
261: }
262: if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263: return(0);
264: }
266: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)267: {
268: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269: const PetscInt llen = mid-left;
273: Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274: while ((i < llen) && (j <= right)) {
275: if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277: ++k;
278: gallopright = 0;
279: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280: PetscInt l1, l2, diff1, diff2;
281: do {
282: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283: /* search temp for right[j], can move up to that of temp into arr immediately */
284: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
285: diff1 = l1-i;
286: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287: k += diff1;
288: i = l1;
289: /* search right for temp[i], can move up to that many of right into arr */
290: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291: diff2 = l2-j;
292: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293: k += diff2;
294: j = l2;
295: if (i >= llen || j > right) break;
296: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297: ++MIN_GALLOP_GLOBAL;
298: }
299: } else {
300: Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301: ++k;
302: gallopleft = 0;
303: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304: PetscInt l1, l2, diff1, diff2;
305: do {
306: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307: /* search right for temp[i], can move up to that many of right into arr */
308: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
309: diff2 = l2-j;
310: Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311: k += diff2;
312: j = l2;
313: /* search temp for right[j], can copy up to that of temp into arr immediately */
314: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
315: diff1 = l1-i;
316: Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317: k += diff1;
318: i = l1;
319: if (i >= llen || j > right) break;
320: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321: ++MIN_GALLOP_GLOBAL;
322: }
323: }
324: }
325: if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326: return(0);
327: }
329: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330: complete array, left is first index of left array, mid is first index of right array, right is last index of right
331: array */
332: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)333: {
334: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335: const PetscInt rlen = right-mid+1;
339: Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340: while ((i >= 0) && (j >= left)) {
341: if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342: Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343: --k;
344: gallopleft = 0;
345: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346: PetscInt l1, l2, diff1, diff2;
347: do {
348: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349: /* search temp for left[j], can copy up to that many of temp into arr */
350: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
351: diff1 = i-l1;
352: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353: k -= diff1;
354: i = l1;
355: /* search left for temp[i], can move up to that many of left up arr */
356: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
357: diff2 = j-l2;
358: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359: k -= diff2;
360: j = l2;
361: if (i < 0 || j < left) break;
362: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363: ++MIN_GALLOP_GLOBAL;
364: }
365: } else {
366: Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367: --k;
368: gallopright = 0;
369: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370: PetscInt l1, l2, diff1, diff2;
371: do {
372: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373: /* search left for temp[i], can move up to that many of left up arr */
374: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
375: diff2 = j-l2;
376: Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377: k -= diff2;
378: j = l2;
379: /* search temp for left[j], can copy up to that many of temp into arr */
380: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
381: diff1 = i-l1;
382: Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383: k -= diff1;
384: i = l1;
385: if (i < 0 || j < left) break;
386: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387: ++MIN_GALLOP_GLOBAL;
388: }
389: }
390: }
391: if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392: return(0);
393: }
395: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)396: {
397: PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398: const PetscInt rlen = right-mid+1;
402: Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403: while ((i >= 0) && (j >= left)) {
404: if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405: Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406: --k;
407: gallopleft = 0;
408: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409: PetscInt l1, l2, diff1, diff2;
410: do {
411: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412: /* search temp for left[j], can copy up to that many of temp into arr */
413: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
414: diff1 = i-l1;
415: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416: k -= diff1;
417: i = l1;
418: /* search left for temp[i], can move up to that many of left up arr */
419: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
420: diff2 = j-l2;
421: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422: k -= diff2;
423: j = l2;
424: if (i < 0 || j < left) break;
425: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426: ++MIN_GALLOP_GLOBAL;
427: }
428: } else {
429: Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430: --k;
431: gallopright = 0;
432: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433: PetscInt l1, l2, diff1, diff2;
434: do {
435: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436: /* search left for temp[i], can move up to that many of left up arr */
437: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
438: diff2 = j-l2;
439: Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440: k -= diff2;
441: j = l2;
442: /* search temp for left[j], can copy up to that many of temp into arr */
443: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
444: diff1 = i-l1;
445: Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446: k -= diff1;
447: i = l1;
448: if (i < 0 || j < left) break;
449: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450: ++MIN_GALLOP_GLOBAL;
451: }
452: }
453: }
454: if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455: return(0);
456: }
458: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)461: {
462: PetscInt i = start == left ? start+1 : start;
465: for (; i <= right; ++i) {
466: PetscInt j = i-1;
467: Petsc_memcpy(tarr, arr+(i*size), size);
468: while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469: COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470: --j;
471: }
472: Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473: }
474: return(0);
475: }
477: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)478: {
479: PetscInt i = start == left ? start+1 : start;
482: for (; i <= right; ++i) {
483: PetscInt j = i-1;
484: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485: while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486: COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487: --j;
488: }
489: Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490: }
491: return(0);
492: }
494: /* See PetscInsertionSort_Private */
495: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)496: {
497: PetscInt i = start == left ? start+1 : start;
500: for (; i <= right; ++i) {
501: PetscInt l = left, r = i;
502: Petsc_memcpy(tarr, arr+(i*size), size);
503: do {
504: const PetscInt m = l + ((r - l) >> 1);
505: if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506: r = m;
507: } else {
508: l = m + 1;
509: }
510: } while (l < r);
511: Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512: Petsc_memcpy(arr+(l*size), tarr, size);
513: }
514: return(0);
515: }
517: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)518: {
519: PetscInt i = start == left ? start+1 : start;
522: for (; i <= right; ++i) {
523: PetscInt l = left, r = i;
524: Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525: do {
526: const PetscInt m = l + ((r - l) >> 1);
527: if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528: r = m;
529: } else {
530: l = m + 1;
531: }
532: } while (l < r);
533: Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534: Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535: }
536: return(0);
537: }
539: typedef struct {
540: PetscInt size;
541: PetscInt start;
542: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
544: typedef struct {
545: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546: size_t size;
547: size_t maxsize;
548: } PetscTimSortBuffer;
550: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)551: {
553: if (PetscLikely(newSize <= buff->size)) return(0);
554: {
555: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
557: size_t newMax = PetscMin(newSize*newSize, buff->maxsize);
558: PetscFree(buff->ptr);
559: PetscMalloc1(newMax, &buff->ptr);
560: buff->size = newMax;
561: }
562: return(0);
563: }
565: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)566: {
568: for (;stacksize; --stacksize) {
569: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570: if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571: PetscInt l, m = stack[stacksize].start, r;
573: /* Search A for B[0] insertion */
574: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
575: /* Search B for A[-1] insertion */
576: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
577: if (m-l <= r-m) {
578: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
579: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
580: } else {
581: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
582: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
583: }
584: }
585: /* Update A with merge */
586: stack[stacksize-1].size += stack[stacksize].size;
587: }
588: return(0);
589: }
591: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)592: {
594: for (;stacksize; --stacksize) {
595: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596: if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597: PetscInt l, m = stack[stacksize].start, r;
599: /* Search A for B[0] insertion */
600: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
601: /* Search B for A[-1] insertion */
602: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
603: if (m-l <= r-m) {
604: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
605: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
606: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
607: } else {
608: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
609: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
610: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
611: }
612: }
613: /* Update A with merge */
614: stack[stacksize-1].size += stack[stacksize].size;
615: }
616: return(0);
617: }
619: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)620: {
621: PetscInt i = *stacksize;
625: while (i) {
626: PetscInt l, m, r, itemp = i;
628: if (i == 1) {
629: /* A = stack[i-1], B = stack[i] */
630: if (stack[i-1].size < stack[i].size) {
631: /* if A[-1] <= B[0] then sorted */
632: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633: m = stack[i].start;
634: /* Search A for B[0] insertion */
635: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
636: /* Search B for A[-1] insertion */
637: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
638: if (m-l <= r-m) {
639: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
640: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
641: } else {
642: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
643: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
644: }
645: }
646: /* Update A with merge */
647: stack[i-1].size += stack[i].size;
648: --i;
649: }
650: } else {
651: /* i > 2, i.e. C exists
652: A = stack[i-2], B = stack[i-1], C = stack[i]; */
653: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654: if (stack[i-2].size < stack[i].size) {
655: /* merge B into A, but if A[-1] <= B[0] then already sorted */
656: if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657: m = stack[i-1].start;
658: /* Search A for B[0] insertion */
659: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
660: /* Search B for A[-1] insertion */
661: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
662: if (m-l <= r-m) {
663: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
664: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
665: } else {
666: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
667: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
668: }
669: }
670: /* Update A with merge */
671: stack[i-2].size += stack[i-1].size;
672: /* Push C up the stack */
673: stack[i-1].start = stack[i].start;
674: stack[i-1].size = stack[i].size;
675: } else {
676: /* merge C into B */
677: mergeBC:678: /* If B[-1] <= C[0] then... you know the drill */
679: if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680: m = stack[i].start;
681: /* Search B for C[0] insertion */
682: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
683: /* Search C for B[-1] insertion */
684: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
685: if (m-l <= r-m) {
686: PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
687: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
688: } else {
689: PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
690: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
691: }
692: }
693: /* Update B with merge */
694: stack[i-1].size += stack[i].size;
695: }
696: --i;
697: } else if (stack[i-1].size <= stack[i].size) {
698: /* merge C into B */
699: goto mergeBC;
700: }
701: }
702: if (itemp == i) break;
703: }
704: *stacksize = i;
705: return(0);
706: }
708: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)709: {
710: PetscInt i = *stacksize;
714: while (i) {
715: PetscInt l, m, r, itemp = i;
717: if (i == 1) {
718: /* A = stack[i-1], B = stack[i] */
719: if (stack[i-1].size < stack[i].size) {
720: /* if A[-1] <= B[0] then sorted */
721: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722: m = stack[i].start;
723: /* Search A for B[0] insertion */
724: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
725: /* Search B for A[-1] insertion */
726: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
727: if (m-l <= r-m) {
728: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
729: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
730: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
731: } else {
732: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
733: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
734: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
735: }
736: }
737: /* Update A with merge */
738: stack[i-1].size += stack[i].size;
739: --i;
740: }
741: } else {
742: /* i > 2, i.e. C exists
743: A = stack[i-2], B = stack[i-1], C = stack[i]; */
744: if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745: if (stack[i-2].size < stack[i].size) {
746: /* merge B into A, but if A[-1] <= B[0] then already sorted */
747: if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748: m = stack[i-1].start;
749: /* Search A for B[0] insertion */
750: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
751: /* Search B for A[-1] insertion */
752: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
753: if (m-l <= r-m) {
754: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
755: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
756: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
757: } else {
758: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
759: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
760: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
761: }
762: }
763: /* Update A with merge */
764: stack[i-2].size += stack[i-1].size;
765: /* Push C up the stack */
766: stack[i-1].start = stack[i].start;
767: stack[i-1].size = stack[i].size;
768: } else {
769: /* merge C into B */
770: mergeBC:771: /* If B[-1] <= C[0] then... you know the drill */
772: if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773: m = stack[i].start;
774: /* Search B for C[0] insertion */
775: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
776: /* Search C for B[-1] insertion */
777: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
778: if (m-l <= r-m) {
779: PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
780: PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
781: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
782: } else {
783: PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
784: PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
785: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
786: }
787: }
788: /* Update B with merge */
789: stack[i-1].size += stack[i].size;
790: }
791: --i;
792: } else if (stack[i-1].size <= stack[i].size) {
793: /* merge C into B */
794: goto mergeBC;
795: }
796: }
797: if (itemp == i) break;
798: }
799: *stacksize = i;
800: return(0);
801: }
803: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805: binary insertion sort or regulat insertion sort */
806: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)807: {
808: const PetscInt re = PetscMin(runstart+minrun, n-1);
809: PetscInt ri = runstart;
812: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
813: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815: ++ri;
816: while (ri < n-1) {
817: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818: ++ri;
819: }
820: {
821: PetscInt lo = runstart, hi = ri;
822: for (; lo < hi; ++lo, --hi) {
823: COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824: }
825: }
826: } else {
827: ++ri;
828: while (ri < n-1) {
829: if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830: ++ri;
831: }
832: }
833: #if defined(PETSC_USE_DEBUG)
834: {
836: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
837: }
838: #endif
839: if (ri < re) {
840: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841: binary search */
842: if (ri-runstart <= minrun >> 1) {
843: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844: PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845: } else {
846: PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847: }
848: *runend = re;
849: } else *runend = ri;
850: return(0);
851: }
853: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)854: {
855: const PetscInt re = PetscMin(runstart+minrun, n-1);
856: PetscInt ri = runstart;
859: if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
860: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861: if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862: ++ri;
863: while (ri < n-1) {
864: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865: ++ri;
866: }
867: {
868: PetscInt lo = runstart, hi = ri;
869: for (; lo < hi; ++lo, --hi) {
870: COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871: }
872: }
873: } else {
874: ++ri;
875: while (ri < n-1) {
876: if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877: ++ri;
878: }
879: }
880: #if defined(PETSC_USE_DEBUG)
881: {
883: PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
884: }
885: #endif
886: if (ri < re) {
887: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888: binary search */
889: if (ri-runstart <= minrun >> 1) {
890: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891: PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892: } else {
893: PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894: }
895: *runend = re;
896: } else *runend = ri;
897: return(0);
898: }
900: /*@C
901: PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
903: Not Collective
905: Input Parameters:
906: + n - number of values
907: . arr - array to be sorted
908: . size - size in bytes of the datatype held in arr
909: . cmp - function pointer to comparison function
910: - ctx - optional context to be passed to comparison function, NULL if not needed
912: Output Parameters:
913: . arr - sorted array
915: Notes:
916: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919: do so it repeatedly triggers attempts throughout to merge adjacent runs.
921: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925: likely all contain similar data.
927: Sample usage:
928: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930: may also
931: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933: order
935: .vb
936: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937: my_type l = *(my_type *) left, r = *(my_type *) right;
938: return (l < r) ? -1 : (l > r);
939: }
940: .ve
941: Note the context is unused here but you may use it to pass and subsequently access whatever information required
942: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943: Then pass the function
944: .vb
945: PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946: .ve
948: Fortran Notes:
949: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950: returns result. For example
951: .vb
952: subroutine CompareIntegers(left,right,ctx,result)
953: implicit none
955: PetscInt,intent(in) :: left, right
956: type(UserCtx) :: ctx
957: integer,intent(out) :: result
959: if (left < right) then
960: result = -1
961: else if (left == right) then
962: result = 0
963: else
964: result = 1
965: end if
966: return
967: end subroutine CompareIntegers
968: .ve
970: References:
971: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
973: Level: developer
975: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976: @*/
977: PetscErrorCodePetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)978: {
979: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
980: PetscTimSortStack runstack[128];
981: PetscTimSortBuffer buff;
982: PetscErrorCode ierr;
983: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984: It is so unlikely that this limit is reached that this is __never__ checked for */
987: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988: is a power of 2 or one plus a power of 2 */
989: {
990: PetscInt t = n, r = 0;
991: /* r becomes 1 if the least significant bits contain at least one off bit */
992: while (t >= 64) {
993: r |= t & 1;
994: t >>= 1;
995: }
996: minrun = t + r;
997: }
998: if (PetscDefined(USE_DEBUG)) {
999: PetscInfo1(NULL, "minrun = %D\n", minrun);
1000: if (n < 64) {
1001: PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1002: } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003: }
1004: PetscMalloc1((size_t) minrun*size, &buff.ptr);
1005: buff.size = (size_t) minrun*size;
1006: buff.maxsize = (size_t) n*size;
1007: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008: while (runstart < n) {
1009: /* Check if additional entries are at least partially ordered and build natural run */
1010: PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
1011: runstack[stacksize].start = runstart;
1012: runstack[stacksize].size = runend-runstart+1;
1013: PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
1014: ++stacksize;
1015: runstart = runend+1;
1016: }
1017: /* Have been inside while, so discard last stacksize++ */
1018: --stacksize;
1019: PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
1020: PetscFree(buff.ptr);
1021: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022: return(0);
1023: }
1025: /*@C
1026: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027: reorders a second array to match the first. The arrays need not be the same type.
1029: Not Collective
1031: Input Parameters:
1032: + n - number of values
1033: . arr - array to be sorted
1034: . asize - size in bytes of the datatype held in arr
1035: . barr - array to be reordered
1036: . asize - size in bytes of the datatype held in barr
1037: . cmp - function pointer to comparison function
1038: - ctx - optional context to be passed to comparison function, NULL if not needed
1040: Output Parameters:
1041: + arr - sorted array
1042: - barr - reordered array
1044: Notes:
1045: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046: overlap.
1048: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1053: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057: likely all contain similar data.
1059: Sample usage:
1060: The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064: increasing order
1066: .vb
1067: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068: my_type l = *(my_type *) left, r = *(my_type *) right;
1069: return (l < r) ? -1 : (l > r);
1070: }
1071: .ve
1072: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074: Then pass the function
1075: .vb
1076: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077: .ve
1080: Fortran Notes:
1081: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082: returns result. For example
1083: .vb
1084: subroutine CompareIntegers(left,right,ctx,result)
1085: implicit none
1087: PetscInt,intent(in) :: left, right
1088: type(UserCtx) :: ctx
1089: integer,intent(out) :: result
1091: if (left < right) then
1092: result = -1
1093: else if (left == right) then
1094: result = 0
1095: else
1096: result = 1
1097: end if
1098: return
1099: end subroutine CompareIntegers
1100: .ve
1102: References:
1103: 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1105: Level: developer
1107: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108: @*/
1109: PetscErrorCodePetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)1110: {
1111: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1112: PetscTimSortStack runstack[128];
1113: PetscTimSortBuffer abuff, bbuff;
1114: PetscErrorCode ierr;
1115: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116: It is so unlikely that this limit is reached that this is __never__ checked for */
1119: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120: is a power of 2 or one plus a power of 2 */
1121: {
1122: PetscInt t = n, r = 0;
1123: /* r becomes 1 if the least significant bits contain at least one off bit */
1124: while (t >= 64) {
1125: r |= t & 1;
1126: t >>= 1;
1127: }
1128: minrun = t + r;
1129: }
1130: if (PetscDefined(USE_DEBUG)) {
1131: PetscInfo1(NULL, "minrun = %D\n", minrun);
1132: if (n < 64) {
1133: PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1134: } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1135: }
1136: PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1137: abuff.size = (size_t) minrun*asize;
1138: abuff.maxsize = (size_t) n*asize;
1139: PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1140: bbuff.size = (size_t) minrun*bsize;
1141: bbuff.maxsize = (size_t) n*bsize;
1142: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1143: while (runstart < n) {
1144: /* Check if additional entries are at least partially ordered and build natural run */
1145: PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1146: runstack[stacksize].start = runstart;
1147: runstack[stacksize].size = runend-runstart+1;
1148: PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1149: ++stacksize;
1150: runstart = runend+1;
1151: }
1152: /* Have been inside while, so discard last stacksize++ */
1153: --stacksize;
1154: PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1155: PetscFree(abuff.ptr);
1156: PetscFree(bbuff.ptr);
1157: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1158: return(0);
1159: }
1161: /*@
1162: PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1164: Not Collective
1166: Input Parameters:
1167: + n - number of values
1168: - arr - array of integers
1170: Output Parameters:
1171: . arr - sorted array of integers
1173: Notes:
1174: If the array is less than 64 entries long PetscSortInt() is automatically used.
1176: This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1177: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1178: recomended that the user benchmark their code to see which routine is fastest.
1180: Level: intermediate
1182: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1183: @*/
1184: PetscErrorCodePetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])1185: {
1188: if (n <= 1) return(0);
1190: if (n < 64) {
1191: PetscSortInt(n, arr);
1192: } else {
1193: PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1194: }
1195: return(0);
1196: }
1198: /*@
1199: PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1200: array to match the first.
1202: Not Collective
1204: Input Parameters:
1205: + n - number of values
1206: . arr1 - array of integers to be sorted
1207: - arr2 - array of integers to be reordered
1209: Output Parameters:
1210: + arr1 - sorted array of integers
1211: - arr2 - reordered array of integers
1213: Notes:
1214: The arrays CANNOT overlap.
1216: If the array to be sorted is less than 64 entries long PetscSortIntWithArray() is automatically used.
1218: This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1219: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1220: recomended that the user benchmark their code to see which routine is fastest.
1222: Level: intermediate
1224: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1225: @*/
1226: PetscErrorCodePetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])1227: {
1232: if (n == 1) return(0);
1233: if (n < 64) {
1234: PetscSortIntWithArray(n, arr1, arr2);
1235: } else {
1236: PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1237: }
1238: return(0);
1239: }
1241: /*@
1242: PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1244: Not Collective
1246: Input Parameters:
1247: + n - number of values
1248: - arr - array of PetscMPIInts
1250: Output Parameters:
1251: . arr - sorted array of integers
1253: Notes:
1254: If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1256: This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1257: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1258: recomended that the user benchmark their code to see which routine is fastest.
1260: Level: intermediate
1262: .seealso: PetscTimSort(), PetscSortMPIInt()
1263: @*/
1264: PetscErrorCodePetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])1265: {
1266: PetscErrorCode ierr;
1270: if (n == 1) return(0);
1271: if (n < 64) {
1272: PetscSortMPIInt(n, arr);
1273: } else {
1274: PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1275: }
1276: return(0);
1277: }
1279: /*@
1280: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1281: array to match the first.
1283: Not Collective
1285: Input Parameters:
1286: + n - number of values
1287: . arr1 - array of integers to be sorted
1288: - arr2 - array of integers to be reordered
1290: Output Parameters:
1291: + arr1 - sorted array of integers
1292: - arr2 - reordered array of integers
1294: Notes:
1295: The arrays CANNOT overlap.
1297: If the array to be sorted is less than 64 entries long PetscSortMPIIntWithArray() is automatically used.
1299: This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1300: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1301: recomended that the user benchmark their code to see which routine is fastest.
1303: Level: intermediate
1305: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1306: @*/
1307: PetscErrorCodePetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])1308: {
1311: if (n <= 1) return(0);
1314: if (n < 64) {
1315: PetscSortMPIIntWithArray(n, arr1, arr2);
1316: } else {
1317: PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1318: }
1319: return(0);
1320: }
1322: /*@
1323: PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1325: Not Collective
1327: Input Parameters:
1328: + n - number of values
1329: - arr - array of PetscReals
1331: Output Parameters:
1332: . arr - sorted array of integers
1334: Notes:
1335: If the array is less than 64 entries long PetscSortReal() is automatically used.
1337: This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1338: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1339: recomended that the user benchmark their code to see which routine is fastest.
1341: Level: intermediate
1343: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1344: @*/
1345: PetscErrorCodePetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])1346: {
1347: PetscErrorCode ierr;
1350: if (n <= 1) return(0);
1352: if (n < 64) {
1353: PetscSortReal(n, arr);
1354: } else {
1355: PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1356: }
1357: return(0);
1358: }
1360: /*@
1361: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1362: array of PetscInts to match the first.
1364: Not Collective
1366: Input Parameters:
1367: + n - number of values
1368: . arr1 - array of PetscReals to be sorted
1369: - arr2 - array of PetscReals to be reordered
1371: Output Parameters:
1372: + arr1 - sorted array of PetscReals
1373: - arr2 - reordered array of PetscInts
1375: Notes:
1376: If the array to be sorted is less than 64 entries long PetscSortRealWithArrayInt() is automatically used.
1378: This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1379: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1380: recomended that the user benchmark their code to see which routine is fastest.
1382: Level: intermediate
1384: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1385: @*/
1386: PetscErrorCodePetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])1387: {
1390: if (n <= 1) return(0);
1393: if (n < 64) {
1394: PetscSortRealWithArrayInt(n, arr1, arr2);
1395: } else {
1396: PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1397: }
1398: return(0);
1399: }