Actual source code: sortso.c

  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
  5: {
  6:   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
  7:   return (l < r) ? -1 : (l > r);
  8: }

 10: static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 11: {
 12:   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
 13:   return (l < r) ? -1 : (l > r);
 14: }

 16: static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 17: {
 18:   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
 19:   return (l < r) ? -1 : (l > r);
 20: }

 22: #define MIN_GALLOP_CONST_GLOBAL 8
 23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
 24: typedef int (*CompFunc)(const void *, const void *, void *);

 26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
 27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
 28: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 29: {
 30:   __builtin_memcpy(t, b, size);
 31:   __builtin_memmove(b, a, size);
 32:   __builtin_memcpy(a, t, size);
 33:   return;
 34: }

 36: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 37: {
 38:   __builtin_memcpy(t, ar, asize);
 39:   __builtin_memmove(ar, al, asize);
 40:   __builtin_memcpy(al, t, asize);
 41:   __builtin_memcpy(t, br, bsize);
 42:   __builtin_memmove(br, bl, bsize);
 43:   __builtin_memcpy(bl, t, bsize);
 44:   return;
 45: }

 47: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
 48: {
 49:   __builtin_memcpy(dest, src, size);
 50:   return;
 51: }

 53: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 54: {
 55:   __builtin_memcpy(adest, asrc, asize);
 56:   __builtin_memcpy(bdest, bsrc, bsize);
 57:   return;
 58: }

 60: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
 61: {
 62:   __builtin_memmove(dest, src, size);
 63:   return;
 64: }

 66: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 67: {
 68:   __builtin_memmove(adest, asrc, asize);
 69:   __builtin_memmove(bdest, bsrc, bsize);
 70:   return;
 71: }
 72: # else
 73: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 74: {
 75:   PETSC_COMM_SELF,PetscMemcpy(t, b, size);
 76:   PETSC_COMM_SELF,PetscMemmove(b, a, size);
 77:   PETSC_COMM_SELF,PetscMemcpy(a, t, size);
 78:   return;
 79: }

 81: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 82: {
 83:   PETSC_COMM_SELF,PetscMemcpy(t, ar, asize);
 84:   PETSC_COMM_SELF,PetscMemmove(ar, al, asize);
 85:   PETSC_COMM_SELF,PetscMemcpy(al, t, asize);
 86:   PETSC_COMM_SELF,PetscMemcpy(t, br, bsize);
 87:   PETSC_COMM_SELF,PetscMemmove(br, bl, bsize);
 88:   PETSC_COMM_SELF,PetscMemcpy(bl, t, bsize);
 89:   return;
 90: }

 92: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
 93: {
 94:   PETSC_COMM_SELF,PetscMemcpy(dest, src, size);
 95:   return;
 96: }

 98: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 99: {
100:   PETSC_COMM_SELF,PetscMemcpy(adest, asrc, asize);
101:   PETSC_COMM_SELF,PetscMemcpy(bdest, bsrc, bsize);
102:   return;
103: }

105: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
106: {
107:   PETSC_COMM_SELF,PetscMemmove(dest, src, size);
108:   return;
109: }

111: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
112: {
113:   PETSC_COMM_SELF,PetscMemmove(adest, asrc, asize);
114:   PETSC_COMM_SELF,PetscMemmove(bdest, bsrc, bsize);
115:   return;
116: }
117: #endif

119: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
120:  x. Output also inclusive.

122:  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:

124:    {0,1,2,3,4,5}

126:    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
127:   */
128: static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
129: {
130:   PetscInt last = l, k = 1, mid, cur = l+1;

132:   *m = l;
133:   PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft",r,l);
134:   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return 0;}
135:   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return 0;
136:   while (PETSC_TRUE) {
137:     if (PetscUnlikely(cur > r)) {cur = r; break;}
138:     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
139:     last = cur;
140:     cur += (k <<= 1) + 1;
141:     ++k;
142:   }
143:   /* standard binary search but take last 0 mid 0 cur 1 into account*/
144:   while (cur > last + 1) {
145:     mid = last + ((cur - last) >> 1);
146:     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
147:       cur = mid;
148:     } else {
149:       last = mid;
150:     }
151:   }
152:   *m = cur;
153:   return 0;
154: }

156: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
157:  < x. Output also inclusive */
158: static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
159: {
160:   PetscInt last = r, k = 1, mid, cur = r-1;

162:   *m = r;
163:   PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight",r,l);
164:   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return 0;}
165:   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return 0;
166:   while (PETSC_TRUE) {
167:     if (PetscUnlikely(cur < l)) {cur = l; break;}
168:     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
169:     last = cur;
170:     cur -= (k <<= 1) + 1;
171:     ++k;
172:   }
173:   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
174:   while (last > cur + 1) {
175:     mid = last - ((last - cur) >> 1);
176:     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
177:       cur = mid;
178:     } else {
179:       last = mid;
180:     }
181:   }
182:   *m = cur;
183:   return 0;
184: }

186: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
187:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
188:  array */
189: static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
190: {
191:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
192:   const PetscInt llen = mid-left;

194:   Petsc_memcpy(tarr, arr+(left*size), llen*size);
195:   while ((i < llen) && (j <= right)) {
196:     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
197:       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
198:       ++k;
199:       gallopright = 0;
200:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
201:         PetscInt l1, l2, diff1, diff2;
202:         do {
203:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
204:           /* search temp for right[j], can move up to that of temp into arr immediately */
205:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
206:           diff1 = l1-i;
207:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
208:           k += diff1;
209:           i = l1;
210:           /* search right for temp[i], can move up to that many of right into arr */
211:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
212:           diff2 = l2-j;
213:           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
214:           k += diff2;
215:           j = l2;
216:           if (i >= llen || j > right) break;
217:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
218:         ++MIN_GALLOP_GLOBAL;
219:       }
220:     } else {
221:       Petsc_memmove(arr+(k*size), arr+(j*size), size);
222:       ++k;
223:       gallopleft = 0;
224:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
225:         PetscInt l1, l2, diff1, diff2;
226:         do {
227:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
228:           /* search right for temp[i], can move up to that many of right into arr */
229:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
230:           diff2 = l2-j;
231:           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
232:           k += diff2;
233:           j = l2;
234:           /* search temp for right[j], can copy up to that of temp into arr immediately */
235:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
236:           diff1 = l1-i;
237:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
238:           k += diff1;
239:           i = l1;
240:           if (i >= llen || j > right) break;
241:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
242:         ++MIN_GALLOP_GLOBAL;
243:       }
244:     }
245:   }
246:   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
247:   return 0;
248: }

250: static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
251: {
252:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
253:   const PetscInt llen = mid-left;

255:   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
256:   while ((i < llen) && (j <= right)) {
257:     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
258:       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
259:       ++k;
260:       gallopright = 0;
261:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
262:         PetscInt l1, l2, diff1, diff2;
263:         do {
264:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
265:           /* search temp for right[j], can move up to that of temp into arr immediately */
266:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
267:           diff1 = l1-i;
268:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
269:           k += diff1;
270:           i = l1;
271:           /* search right for temp[i], can move up to that many of right into arr */
272:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
273:           diff2 = l2-j;
274:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
275:           k += diff2;
276:           j = l2;
277:           if (i >= llen || j > right) break;
278:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
279:         ++MIN_GALLOP_GLOBAL;
280:       }
281:     } else {
282:       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
283:       ++k;
284:       gallopleft = 0;
285:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
286:         PetscInt l1, l2, diff1, diff2;
287:         do {
288:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
289:           /* search right for temp[i], can move up to that many of right into arr */
290:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291:           diff2 = l2-j;
292:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293:           k += diff2;
294:           j = l2;
295:           /* search temp for right[j], can copy up to that of temp into arr immediately */
296:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
297:           diff1 = l1-i;
298:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
299:           k += diff1;
300:           i = l1;
301:           if (i >= llen || j > right) break;
302:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
303:         ++MIN_GALLOP_GLOBAL;
304:       }
305:     }
306:   }
307:   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
308:   return 0;
309: }

311: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
312:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
313:  array */
314: static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
315: {
316:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
317:   const PetscInt rlen = right-mid+1;

319:   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
320:   while ((i >= 0) && (j >= left)) {
321:     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
322:       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
323:       --k;
324:       gallopleft = 0;
325:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
326:         PetscInt l1, l2, diff1, diff2;
327:         do {
328:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
329:           /* search temp for left[j], can copy up to that many of temp into arr */
330:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
331:           diff1 = i-l1;
332:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
333:           k -= diff1;
334:           i = l1;
335:           /* search left for temp[i], can move up to that many of left up arr */
336:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
337:           diff2 = j-l2;
338:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
339:           k -= diff2;
340:           j = l2;
341:           if (i < 0 || j < left) break;
342:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
343:         ++MIN_GALLOP_GLOBAL;
344:       }
345:     } else {
346:       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
347:       --k;
348:       gallopright = 0;
349:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
350:         PetscInt l1, l2, diff1, diff2;
351:         do {
352:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
353:           /* search left for temp[i], can move up to that many of left up arr */
354:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
355:           diff2 = j-l2;
356:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
357:           k -= diff2;
358:           j = l2;
359:           /* search temp for left[j], can copy up to that many of temp into arr */
360:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
361:           diff1 = i-l1;
362:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
363:           k -= diff1;
364:           i = l1;
365:           if (i < 0 || j < left) break;
366:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
367:         ++MIN_GALLOP_GLOBAL;
368:       }
369:     }
370:   }
371:   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
372:   return 0;
373: }

375: static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
376: {
377:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
378:   const PetscInt rlen = right-mid+1;

380:   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
381:   while ((i >= 0) && (j >= left)) {
382:     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
383:       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
384:       --k;
385:       gallopleft = 0;
386:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
387:         PetscInt l1, l2, diff1, diff2;
388:         do {
389:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
390:           /* search temp for left[j], can copy up to that many of temp into arr */
391:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
392:           diff1 = i-l1;
393:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
394:           k -= diff1;
395:           i = l1;
396:           /* search left for temp[i], can move up to that many of left up arr */
397:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
398:           diff2 = j-l2;
399:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
400:           k -= diff2;
401:           j = l2;
402:           if (i < 0 || j < left) break;
403:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
404:         ++MIN_GALLOP_GLOBAL;
405:       }
406:     } else {
407:       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
408:       --k;
409:       gallopright = 0;
410:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
411:         PetscInt l1, l2, diff1, diff2;
412:         do {
413:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
414:           /* search left for temp[i], can move up to that many of left up arr */
415:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
416:           diff2 = j-l2;
417:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
418:           k -= diff2;
419:           j = l2;
420:           /* search temp for left[j], can copy up to that many of temp into arr */
421:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
422:           diff1 = i-l1;
423:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
424:           k -= diff1;
425:           i = l1;
426:           if (i < 0 || j < left) break;
427:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
428:         ++MIN_GALLOP_GLOBAL;
429:       }
430:     }
431:   }
432:   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
433:   return 0;
434: }

436: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
437:  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
438: static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
439: {
440:   PetscInt i = start == left ? start+1 : start;

442:   for (; i <= right; ++i) {
443:     PetscInt j = i-1;
444:     Petsc_memcpy(tarr, arr+(i*size), size);
445:     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
446:       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
447:       --j;
448:     }
449:     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
450:   }
451:   return 0;
452: }

454: static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
455: {
456:   PetscInt i = start == left ? start+1 : start;

458:   for (; i <= right; ++i) {
459:     PetscInt j = i-1;
460:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
461:     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
462:       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
463:       --j;
464:     }
465:     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
466:   }
467:   return 0;
468: }

470: /* See PetscInsertionSort_Private */
471: static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
472: {
473:   PetscInt i = start == left ? start+1 : start;

475:   for (; i <= right; ++i) {
476:     PetscInt l = left, r = i;
477:     Petsc_memcpy(tarr, arr+(i*size), size);
478:     do {
479:       const PetscInt m = l + ((r - l) >> 1);
480:       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
481:         r = m;
482:       } else {
483:         l = m + 1;
484:       }
485:     } while (l < r);
486:     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
487:     Petsc_memcpy(arr+(l*size), tarr, size);
488:   }
489:   return 0;
490: }

492: static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
493: {
494:   PetscInt i = start == left ? start+1 : start;

496:   for (; i <= right; ++i) {
497:     PetscInt l = left, r = i;
498:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
499:     do {
500:       const PetscInt m = l + ((r - l) >> 1);
501:       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
502:         r = m;
503:       } else {
504:         l = m + 1;
505:       }
506:     } while (l < r);
507:     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
508:     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
509:   }
510:   return 0;
511: }

513: typedef struct {
514:   PetscInt size;
515:   PetscInt start;
516: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
517: } PetscTimSortStack;
518: #else
519: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
520: #endif

522: typedef struct {
523:   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
524:   size_t size;
525:   size_t maxsize;
526: } PetscTimSortBuffer;

528: static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
529: {
530:   if (PetscLikely(newSize <= buff->size)) return 0;
531:   {
532:     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
533:     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
534:     PetscFree(buff->ptr);
535:     PetscMalloc1(newMax, &buff->ptr);
536:     buff->size = newMax;
537:   }
538:   return 0;
539: }

541: static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
542: {
543:   for (;stacksize; --stacksize) {
544:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
545:     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
546:       PetscInt       l, m = stack[stacksize].start, r;
547:       /* Search A for B[0] insertion */
548:       PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
549:       /* Search B for A[-1] insertion */
550:       PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
551:       if (m-l <= r-m) {
552:         PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
553:         PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
554:       } else {
555:         PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
556:         PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
557:       }
558:     }
559:     /* Update A with merge */
560:     stack[stacksize-1].size += stack[stacksize].size;
561:   }
562:   return 0;
563: }

565: static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
566: {
567:   for (;stacksize; --stacksize) {
568:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
569:     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
570:       PetscInt       l, m = stack[stacksize].start, r;
571:       /* Search A for B[0] insertion */
572:       PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
573:       /* Search B for A[-1] insertion */
574:       PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
575:       if (m-l <= r-m) {
576:         PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
577:         PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
578:         PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
579:       } else {
580:         PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
581:         PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
582:         PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
583:       }
584:     }
585:     /* Update A with merge */
586:     stack[stacksize-1].size += stack[stacksize].size;
587:   }
588:   return 0;
589: }

591: static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
592: {
593:   PetscInt       i = *stacksize;

595:   while (i) {
596:     PetscInt l, m, r, itemp = i;

598:     if (i == 1) {
599:       /* A = stack[i-1], B = stack[i] */
600:       if (stack[i-1].size < stack[i].size) {
601:         /* if A[-1] <= B[0] then sorted */
602:         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
603:           m = stack[i].start;
604:           /* Search A for B[0] insertion */
605:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
606:           /* Search B for A[-1] insertion */
607:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
608:           if (m-l <= r-m) {
609:             PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
610:             PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
611:           } else {
612:             PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
613:             PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
614:           }
615:         }
616:         /* Update A with merge */
617:         stack[i-1].size += stack[i].size;
618:         --i;
619:       }
620:     } else {
621:       /* i > 2, i.e. C exists
622:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
623:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
624:         if (stack[i-2].size < stack[i].size) {
625:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
626:           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
627:             m = stack[i-1].start;
628:             /* Search A for B[0] insertion */
629:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
630:             /* Search B for A[-1] insertion */
631:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
632:             if (m-l <= r-m) {
633:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
634:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
635:             } else {
636:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
637:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
638:             }
639:           }
640:           /* Update A with merge */
641:           stack[i-2].size += stack[i-1].size;
642:           /* Push C up the stack */
643:           stack[i-1].start = stack[i].start;
644:           stack[i-1].size = stack[i].size;
645:         } else {
646:           /* merge C into B */
647:           mergeBC:
648:           /* If B[-1] <= C[0] then... you know the drill */
649:           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
650:             m = stack[i].start;
651:             /* Search B for C[0] insertion */
652:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
653:             /* Search C for B[-1] insertion */
654:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
655:             if (m-l <= r-m) {
656:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
657:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
658:             } else {
659:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
660:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
661:             }
662:           }
663:           /* Update B with merge */
664:           stack[i-1].size += stack[i].size;
665:         }
666:         --i;
667:       } else if (stack[i-1].size <= stack[i].size) {
668:         /* merge C into B */
669:         goto mergeBC;
670:       }
671:     }
672:     if (itemp == i) break;
673:   }
674:   *stacksize = i;
675:   return 0;
676: }

678: static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
679: {
680:   PetscInt       i = *stacksize;

682:   while (i) {
683:     PetscInt l, m, r, itemp = i;

685:     if (i == 1) {
686:       /* A = stack[i-1], B = stack[i] */
687:       if (stack[i-1].size < stack[i].size) {
688:         /* if A[-1] <= B[0] then sorted */
689:         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
690:           m = stack[i].start;
691:           /* Search A for B[0] insertion */
692:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
693:           /* Search B for A[-1] insertion */
694:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
695:           if (m-l <= r-m) {
696:             PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
697:             PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
698:             PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
699:           } else {
700:             PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
701:             PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
702:             PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
703:           }
704:         }
705:         /* Update A with merge */
706:         stack[i-1].size += stack[i].size;
707:         --i;
708:       }
709:     } else {
710:       /* i > 2, i.e. C exists
711:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
712:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
713:         if (stack[i-2].size < stack[i].size) {
714:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
715:           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
716:             m = stack[i-1].start;
717:             /* Search A for B[0] insertion */
718:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
719:             /* Search B for A[-1] insertion */
720:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
721:             if (m-l <= r-m) {
722:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
723:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
724:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
725:             } else {
726:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
727:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
728:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
729:             }
730:           }
731:           /* Update A with merge */
732:           stack[i-2].size += stack[i-1].size;
733:           /* Push C up the stack */
734:           stack[i-1].start = stack[i].start;
735:           stack[i-1].size = stack[i].size;
736:         } else {
737:           /* merge C into B */
738:           mergeBC:
739:           /* If B[-1] <= C[0] then... you know the drill */
740:           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
741:             m = stack[i].start;
742:             /* Search B for C[0] insertion */
743:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
744:             /* Search C for B[-1] insertion */
745:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
746:             if (m-l <= r-m) {
747:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
748:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
749:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
750:             } else {
751:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
752:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
753:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
754:             }
755:           }
756:           /* Update B with merge */
757:           stack[i-1].size += stack[i].size;
758:         }
759:         --i;
760:       } else if (stack[i-1].size <= stack[i].size) {
761:         /* merge C into B */
762:         goto mergeBC;
763:       }
764:     }
765:     if (itemp == i) break;
766:   }
767:   *stacksize = i;
768:   return 0;
769: }

771: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
772:  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
773:  binary insertion sort or regulat insertion sort */
774: static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
775: {
776:   const PetscInt re = PetscMin(runstart+minrun, n-1);
777:   PetscInt       ri = runstart;

779:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return 0;}
780:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
781:   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
782:     ++ri;
783:     while (ri < n-1) {
784:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
785:       ++ri;
786:     }
787:     {
788:       PetscInt lo = runstart, hi = ri;
789:       for (; lo < hi; ++lo, --hi) {
790:         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
791:       }
792:     }
793:   } else {
794:     ++ri;
795:     while (ri < n-1) {
796:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
797:       ++ri;
798:     }
799:   }
800:   if (ri < re) {
801:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
802:      binary search */
803:     if (ri-runstart <= minrun >> 1) {
804:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
805:       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
806:     } else {
807:       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
808:     }
809:     *runend = re;
810:   } else *runend = ri;
811:   return 0;
812: }

814: static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
815: {
816:   const PetscInt re = PetscMin(runstart+minrun, n-1);
817:   PetscInt       ri = runstart;

819:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return 0;}
820:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
821:   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
822:     ++ri;
823:     while (ri < n-1) {
824:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
825:       ++ri;
826:     }
827:     {
828:       PetscInt lo = runstart, hi = ri;
829:       for (; lo < hi; ++lo, --hi) {
830:         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
831:       }
832:     }
833:   } else {
834:     ++ri;
835:     while (ri < n-1) {
836:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
837:       ++ri;
838:     }
839:   }
840:   if (ri < re) {
841:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
842:      binary search */
843:     if (ri-runstart <= minrun >> 1) {
844:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
845:       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
846:     } else {
847:       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
848:     }
849:     *runend = re;
850:   } else *runend = ri;
851:   return 0;
852: }

854: /*@C
855:   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.

857:   Not Collective

859:   Input Parameters:
860: + n    - number of values
861: . arr  - array to be sorted
862: . size - size in bytes of the datatype held in arr
863: . cmp  - function pointer to comparison function
864: - ctx  - optional context to be passed to comparison function, NULL if not needed

866:   Output Parameters:
867: . arr  - sorted array

869:   Notes:
870:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
871:  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
872:  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
873:  do so it repeatedly triggers attempts throughout to merge adjacent runs.

875:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
876:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
877:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
878:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
879:   likely all contain similar data.

881:   Sample usage:
882:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
883:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
884:   may also
885:  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
886:  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
887:   order

889: .vb
890:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
891:     my_type l = *(my_type *) left, r = *(my_type *) right;
892:     return (l < r) ? -1 : (l > r);
893:   }
894: .ve
895:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
896:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
897:   Then pass the function
898: .vb
899:   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
900: .ve

902:   Fortran Notes:
903:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
904:   returns result. For example
905: .vb
906:  subroutine CompareIntegers(left,right,ctx,result)
907:    implicit none

909:    PetscInt,intent(in) :: left, right
910:    type(UserCtx)       :: ctx
911:    integer,intent(out) :: result

913:    if (left < right) then
914:      result = -1
915:    else if (left == right) then
916:      result = 0
917:    else
918:      result = 1
919:    end if
920:    return
921:  end subroutine CompareIntegers
922: .ve

924:   References:
925: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt

927:   Level: developer

929: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
930: @*/
931: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
932: {
933:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
934:   PetscTimSortStack  runstack[128];
935:   PetscTimSortBuffer buff;
936:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
937:    It is so unlikely that this limit is reached that this is __never__ checked for */

939:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
940:    is a power of 2 or one plus a power of 2 */
941:   {
942:     PetscInt t = n, r = 0;
943:     /* r becomes 1 if the least significant bits contain at least one off bit */
944:     while (t >= 64) {
945:       r |= t & 1;
946:       t >>= 1;
947:     }
948:     minrun = t + r;
949:   }
950:   if (PetscDefined(USE_DEBUG)) {
951:     PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
952:     if (n < 64) {
953:       PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n);
955:   }
956:   PetscMalloc1((size_t) minrun*size, &buff.ptr);
957:   buff.size = (size_t) minrun*size;
958:   buff.maxsize = (size_t) n*size;
959:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
960:   while (runstart < n) {
961:     /* Check if additional entries are at least partially ordered and build natural run */
962:     PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
963:     runstack[stacksize].start = runstart;
964:     runstack[stacksize].size = runend-runstart+1;
965:     PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
966:     ++stacksize;
967:     runstart = runend+1;
968:   }
969:   /* Have been inside while, so discard last stacksize++ */
970:   --stacksize;
971:   PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
972:   PetscFree(buff.ptr);
973:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
974:   return 0;
975: }

977: /*@C
978:   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
979:   reorders a second array to match the first. The arrays need not be the same type.

981:   Not Collective

983:   Input Parameters:
984: + n     - number of values
985: . asize - size in bytes of the datatype held in arr
986: . bsize - size in bytes of the datatype held in barr
987: . cmp   - function pointer to comparison function
988: - ctx   - optional context to be passed to comparison function, NULL if not needed

990:   Input/Output Parameters:
991: + arr  - array to be sorted, on output it is sorted
992: - barr - array to be reordered, on output it is reordered

994:   Notes:
995:   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
996:   overlap.

998:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
999:   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1000:  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1001:  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.

1003:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1004:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1005:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1006:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1007:   likely all contain similar data.

1009:   Sample usage:
1010:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1011:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1012:   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1013:   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1014:   increasing order

1016: .vb
1017:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1018:     my_type l = *(my_type *) left, r = *(my_type *) right;
1019:     return (l < r) ? -1 : (l > r);
1020:   }
1021: .ve
1022:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1023:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1024:   Then pass the function
1025: .vb
1026:   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1027: .ve

1029:   Fortran Notes:
1030:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1031:   returns result. For example
1032: .vb
1033:  subroutine CompareIntegers(left,right,ctx,result)
1034:    implicit none

1036:    PetscInt,intent(in) :: left, right
1037:    type(UserCtx)       :: ctx
1038:    integer,intent(out) :: result

1040:    if (left < right) then
1041:      result = -1
1042:    else if (left == right) then
1043:      result = 0
1044:    else
1045:      result = 1
1046:    end if
1047:    return
1048:  end subroutine CompareIntegers
1049: .ve

1051:   References:
1052: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt

1054:   Level: developer

1056: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1057: @*/
1058: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1059: {
1060:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1061:   PetscTimSortStack  runstack[128];
1062:   PetscTimSortBuffer abuff, bbuff;
1063:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1064:    It is so unlikely that this limit is reached that this is __never__ checked for */

1066:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1067:    is a power of 2 or one plus a power of 2 */
1068:   {
1069:     PetscInt t = n, r = 0;
1070:     /* r becomes 1 if the least significant bits contain at least one off bit */
1071:     while (t >= 64) {
1072:       r |= t & 1;
1073:       t >>= 1;
1074:     }
1075:     minrun = t + r;
1076:   }
1077:   if (PetscDefined(USE_DEBUG)) {
1078:     PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
1080:   }
1081:   PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1082:   abuff.size = (size_t) minrun*asize;
1083:   abuff.maxsize = (size_t) n*asize;
1084:   PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1085:   bbuff.size = (size_t) minrun*bsize;
1086:   bbuff.maxsize = (size_t) n*bsize;
1087:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1088:   while (runstart < n) {
1089:     /* Check if additional entries are at least partially ordered and build natural run */
1090:     PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1091:     runstack[stacksize].start = runstart;
1092:     runstack[stacksize].size = runend-runstart+1;
1093:     PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1094:     ++stacksize;
1095:     runstart = runend+1;
1096:   }
1097:   /* Have been inside while, so discard last stacksize++ */
1098:   --stacksize;
1099:   PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1100:   PetscFree(abuff.ptr);
1101:   PetscFree(bbuff.ptr);
1102:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1103:   return 0;
1104: }

1106: /*@
1107:    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.

1109:    Not Collective

1111:    Input Parameters:
1112: +  n   - number of values
1113: -  arr - array of integers

1115:    Output Parameters:
1116: .  arr - sorted array of integers

1118:    Notes:
1119:    If the array is less than 64 entries long PetscSortInt() is automatically used.

1121:    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1122:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1123:    recommended that the user benchmark their code to see which routine is fastest.

1125:    Level: intermediate

1127: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1128: @*/
1129: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1130: {
1131:   if (n <= 1) return 0;
1133:   if (n < 64) {
1134:     PetscSortInt(n, arr);
1135:   } else {
1136:     PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1137:   }
1138:   return 0;
1139: }

1141: /*@
1142:    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1143:    array to match the first.

1145:    Not Collective

1147:    Input Parameter:
1148: .  n   - number of values

1150:    Input/Output Parameters:
1151: +  arr1 - array of integers to be sorted, modified on output
1152: -  arr2 - array of integers to be reordered, modified on output

1154:    Notes:
1155:    The arrays CANNOT overlap.

1157:    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1158:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1159:    recommended that the user benchmark their code to see which routine is fastest.

1161:    Level: intermediate

1163: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1164: @*/
1165: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1166: {
1167:   if (n <= 1) return 0;
1170:   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1171:   PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1172:   return 0;
1173: }

1175: /*@
1176:    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.

1178:    Not Collective

1180:    Input Parameters:
1181: +  n   - number of values
1182: -  arr - array of PetscMPIInts

1184:    Output Parameters:
1185: .  arr - sorted array of integers

1187:    Notes:
1188:    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.

1190:    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1191:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1192:    recommended that the user benchmark their code to see which routine is fastest.

1194:    Level: intermediate

1196: .seealso: PetscTimSort(), PetscSortMPIInt()
1197: @*/
1198: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1199: {
1200:   if (n <= 1) return 0;
1202:   if (n < 64) {
1203:     PetscSortMPIInt(n, arr);
1204:   } else {
1205:     PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1206:   }
1207:   return 0;
1208: }

1210: /*@
1211:    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1212:    array to match the first.

1214:    Not Collective

1216:    Input Parameter:
1217: .  n   - number of values

1219:    Input/Output Parameters:
1220: .  arr1 - array of integers to be sorted, modified on output
1221: -  arr2 - array of integers to be reordered, modified on output

1223:    Notes:
1224:    The arrays CANNOT overlap.

1226:    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1227:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1228:    recommended that the user benchmark their code to see which routine is fastest.

1230:    Level: intermediate

1232: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1233: @*/
1234: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1235: {
1236:   if (n <= 1) return 0;
1239:   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1240:   PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1241:   return 0;
1242: }

1244: /*@
1245:    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.

1247:    Not Collective

1249:    Input Parameters:
1250: +  n   - number of values
1251: -  arr - array of PetscReals

1253:    Output Parameters:
1254: .  arr - sorted array of integers

1256:    Notes:
1257:    If the array is less than 64 entries long PetscSortReal() is automatically used.

1259:    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1260:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1261:    recommended that the user benchmark their code to see which routine is fastest.

1263:    Level: intermediate

1265: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1266: @*/
1267: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1268: {
1269:   if (n <= 1) return 0;
1271:   if (n < 64) {
1272:     PetscSortReal(n, arr);
1273:   } else {
1274:     PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1275:   }
1276:   return 0;
1277: }

1279: /*@
1280:    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1281:    array of PetscInts to match the first.

1283:    Not Collective

1285:    Input Parameter:
1286: .  n   - number of values

1288:    Input/Output Parameters:
1289: .  arr1 - array of PetscReals to be sorted, modified on output
1290: -  arr2 - array of PetscReals to be reordered, modified on output

1292:    Notes:
1293:    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1294:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1295:    recommended that the user benchmark their code to see which routine is fastest.

1297:    Level: intermediate

1299: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1300: @*/
1301: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1302: {
1303:   if (n <= 1) return 0;
1306:   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1307:   PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1308:   return 0;
1309: }