Actual source code: sortso.c

  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
  5: {
  6:   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
  7:   return (l < r) ? -1 : (l > r);
  8: }

 10: PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 11: {
 12:   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
 13:   return (l < r) ? -1 : (l > r);
 14: }

 16: PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 17: {
 18:   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
 19:   return (l < r) ? -1 : (l > r);
 20: }

 22: #define MIN_GALLOP_CONST_GLOBAL 8
 23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
 24: typedef int (*CompFunc)(const void *, const void *, void *);

 26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
 27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
 28: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 29: {
 30:   __builtin_memcpy(t, b, size);
 31:   __builtin_memmove(b, a, size);
 32:   __builtin_memcpy(a, t, size);
 33:   return;
 34: }

 36: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 37: {
 38:   __builtin_memcpy(t, ar, asize);
 39:   __builtin_memmove(ar, al, asize);
 40:   __builtin_memcpy(al, t, asize);
 41:   __builtin_memcpy(t, br, bsize);
 42:   __builtin_memmove(br, bl, bsize);
 43:   __builtin_memcpy(bl, t, bsize);
 44:   return;
 45: }

 47: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
 48: {
 49:   __builtin_memcpy(dest, src, size);
 50:   return;
 51: }

 53: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 54: {
 55:   __builtin_memcpy(adest, asrc, asize);
 56:   __builtin_memcpy(bdest, bsrc, bsize);
 57:   return;
 58: }

 60: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
 61: {
 62:   __builtin_memmove(dest, src, size);
 63:   return;
 64: }

 66: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 67: {
 68:   __builtin_memmove(adest, asrc, asize);
 69:   __builtin_memmove(bdest, bsrc, bsize);
 70:   return;
 71: }
 72: # else
 73: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 74: {
 77:   PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 78:   PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 79:   PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 80:   PetscFunctionReturnVoid();
 81: }

 83: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 84: {
 87:   PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 88:   PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 89:   PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 90:   PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 91:   PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 92:   PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 93:   PetscFunctionReturnVoid();
 94: }

 96: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
 97: {
100:   PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101:   PetscFunctionReturnVoid();
102: }

104: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105: {
108:   PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109:   PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110:   PetscFunctionReturnVoid();
111: }

113: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114: {
117:   PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118:   PetscFunctionReturnVoid();
119: }

121: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122: {
125:   PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126:   PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127:   PetscFunctionReturnVoid();
128: }
129: #endif

131: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132:  x. Output also inclusive.

134:  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:

136:    {0,1,2,3,4,5}

138:    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139:   */
140: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141: {
142:   PetscInt last = l, k = 1, mid, cur = l+1;

145:   *m = l;
146:   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147:   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return(0);}
148:   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return(0);
149:   while (PETSC_TRUE) {
150:     if (PetscUnlikely(cur > r)) {cur = r; break;}
151:     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152:     last = cur;
153:     cur += (k <<= 1) + 1;
154:     ++k;
155:   }
156:   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157:   while (cur > last + 1) {
158:     mid = last + ((cur - last) >> 1);
159:     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160:       cur = mid;
161:     } else {
162:       last = mid;
163:     }
164:   }
165:   *m = cur;
166:   return(0);
167: }

169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170:  < x. Output also inclusive */
171: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172: {
173:   PetscInt last = r, k = 1, mid, cur = r-1;

176:   *m = r;
177:   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178:   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return(0);}
179:   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return(0);
180:   while (PETSC_TRUE) {
181:     if (PetscUnlikely(cur < l)) {cur = l; break;}
182:     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183:     last = cur;
184:     cur -= (k <<= 1) + 1;
185:     ++k;
186:   }
187:   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188:   while (last > cur + 1) {
189:     mid = last - ((last - cur) >> 1);
190:     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191:       cur = mid;
192:     } else {
193:       last = mid;
194:     }
195:   }
196:   *m = cur;
197:   return(0);
198: }

200: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202:  array */
203: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204: {
205:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206:   const PetscInt llen = mid-left;

210:   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211:   while ((i < llen) && (j <= right)) {
212:     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213:       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214:       ++k;
215:       gallopright = 0;
216:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217:         PetscInt l1, l2, diff1, diff2;
218:         do {
219:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220:           /* search temp for right[j], can move up to that of temp into arr immediately */
221:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
222:           diff1 = l1-i;
223:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224:           k += diff1;
225:           i = l1;
226:           /* search right for temp[i], can move up to that many of right into arr */
227:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
228:           diff2 = l2-j;
229:           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230:           k += diff2;
231:           j = l2;
232:           if (i >= llen || j > right) break;
233:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234:         ++MIN_GALLOP_GLOBAL;
235:       }
236:     } else {
237:       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238:       ++k;
239:       gallopleft = 0;
240:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241:         PetscInt l1, l2, diff1, diff2;
242:         do {
243:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244:           /* search right for temp[i], can move up to that many of right into arr */
245:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
246:           diff2 = l2-j;
247:           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248:           k += diff2;
249:           j = l2;
250:           /* search temp for right[j], can copy up to that of temp into arr immediately */
251:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
252:           diff1 = l1-i;
253:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254:           k += diff1;
255:           i = l1;
256:           if (i >= llen || j > right) break;
257:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258:         ++MIN_GALLOP_GLOBAL;
259:       }
260:     }
261:   }
262:   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263:   return(0);
264: }

266: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267: {
268:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269:   const PetscInt llen = mid-left;

273:   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274:   while ((i < llen) && (j <= right)) {
275:     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276:       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277:       ++k;
278:       gallopright = 0;
279:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280:         PetscInt l1, l2, diff1, diff2;
281:         do {
282:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283:           /* search temp for right[j], can move up to that of temp into arr immediately */
284:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
285:           diff1 = l1-i;
286:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287:           k += diff1;
288:           i = l1;
289:           /* search right for temp[i], can move up to that many of right into arr */
290:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291:           diff2 = l2-j;
292:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293:           k += diff2;
294:           j = l2;
295:           if (i >= llen || j > right) break;
296:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297:         ++MIN_GALLOP_GLOBAL;
298:       }
299:     } else {
300:       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301:       ++k;
302:       gallopleft = 0;
303:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304:         PetscInt l1, l2, diff1, diff2;
305:         do {
306:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307:           /* search right for temp[i], can move up to that many of right into arr */
308:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
309:           diff2 = l2-j;
310:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311:           k += diff2;
312:           j = l2;
313:           /* search temp for right[j], can copy up to that of temp into arr immediately */
314:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
315:           diff1 = l1-i;
316:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317:           k += diff1;
318:           i = l1;
319:           if (i >= llen || j > right) break;
320:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321:         ++MIN_GALLOP_GLOBAL;
322:       }
323:     }
324:   }
325:   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326:   return(0);
327: }

329: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331:  array */
332: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333: {
334:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335:   const PetscInt rlen = right-mid+1;

339:   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340:   while ((i >= 0) && (j >= left)) {
341:     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342:       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343:       --k;
344:       gallopleft = 0;
345:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346:         PetscInt l1, l2, diff1, diff2;
347:         do {
348:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349:           /* search temp for left[j], can copy up to that many of temp into arr */
350:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
351:           diff1 = i-l1;
352:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353:           k -= diff1;
354:           i = l1;
355:           /* search left for temp[i], can move up to that many of left up arr */
356:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
357:           diff2 = j-l2;
358:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359:           k -= diff2;
360:           j = l2;
361:           if (i < 0 || j < left) break;
362:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363:         ++MIN_GALLOP_GLOBAL;
364:       }
365:     } else {
366:       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367:       --k;
368:       gallopright = 0;
369:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370:         PetscInt l1, l2, diff1, diff2;
371:         do {
372:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373:           /* search left for temp[i], can move up to that many of left up arr */
374:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
375:           diff2 = j-l2;
376:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377:           k -= diff2;
378:           j = l2;
379:           /* search temp for left[j], can copy up to that many of temp into arr */
380:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
381:           diff1 = i-l1;
382:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383:           k -= diff1;
384:           i = l1;
385:           if (i < 0 || j < left) break;
386:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387:         ++MIN_GALLOP_GLOBAL;
388:       }
389:     }
390:   }
391:   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392:   return(0);
393: }

395: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396: {
397:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398:   const PetscInt rlen = right-mid+1;

402:   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403:   while ((i >= 0) && (j >= left)) {
404:     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405:       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406:       --k;
407:       gallopleft = 0;
408:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409:         PetscInt l1, l2, diff1, diff2;
410:         do {
411:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412:           /* search temp for left[j], can copy up to that many of temp into arr */
413:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
414:           diff1 = i-l1;
415:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416:           k -= diff1;
417:           i = l1;
418:           /* search left for temp[i], can move up to that many of left up arr */
419:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
420:           diff2 = j-l2;
421:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422:           k -= diff2;
423:           j = l2;
424:           if (i < 0 || j < left) break;
425:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426:         ++MIN_GALLOP_GLOBAL;
427:       }
428:     } else {
429:       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430:       --k;
431:       gallopright = 0;
432:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433:         PetscInt l1, l2, diff1, diff2;
434:         do {
435:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436:           /* search left for temp[i], can move up to that many of left up arr */
437:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
438:           diff2 = j-l2;
439:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440:           k -= diff2;
441:           j = l2;
442:           /* search temp for left[j], can copy up to that many of temp into arr */
443:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
444:           diff1 = i-l1;
445:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446:           k -= diff1;
447:           i = l1;
448:           if (i < 0 || j < left) break;
449:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450:         ++MIN_GALLOP_GLOBAL;
451:       }
452:     }
453:   }
454:   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455:   return(0);
456: }

458: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459:  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461: {
462:   PetscInt i = start == left ? start+1 : start;

465:   for (; i <= right; ++i) {
466:     PetscInt j = i-1;
467:     Petsc_memcpy(tarr, arr+(i*size), size);
468:     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469:       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470:       --j;
471:     }
472:     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473:   }
474:   return(0);
475: }

477: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478: {
479:   PetscInt i = start == left ? start+1 : start;

482:   for (; i <= right; ++i) {
483:     PetscInt j = i-1;
484:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485:     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486:       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487:       --j;
488:     }
489:     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490:   }
491:   return(0);
492: }

494: /* See PetscInsertionSort_Private */
495: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496: {
497:   PetscInt i = start == left ? start+1 : start;

500:   for (; i <= right; ++i) {
501:     PetscInt l = left, r = i;
502:     Petsc_memcpy(tarr, arr+(i*size), size);
503:     do {
504:       const PetscInt m = l + ((r - l) >> 1);
505:       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506:         r = m;
507:       } else {
508:         l = m + 1;
509:       }
510:     } while (l < r);
511:     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512:     Petsc_memcpy(arr+(l*size), tarr, size);
513:   }
514:   return(0);
515: }

517: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518: {
519:   PetscInt i = start == left ? start+1 : start;

522:   for (; i <= right; ++i) {
523:     PetscInt l = left, r = i;
524:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525:     do {
526:       const PetscInt m = l + ((r - l) >> 1);
527:       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528:         r = m;
529:       } else {
530:         l = m + 1;
531:       }
532:     } while (l < r);
533:     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534:     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535:   }
536:   return(0);
537: }

539: typedef struct {
540:   PetscInt size;
541:   PetscInt start;
542: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
543: } PetscTimSortStack;
544: #else
545: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
546: #endif

548: typedef struct {
549:   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
550:   size_t size;
551:   size_t maxsize;
552: } PetscTimSortBuffer;

554: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
555: {
557:   if (PetscLikely(newSize <= buff->size)) return(0);
558:   {
559:     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
561:     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
562:     PetscFree(buff->ptr);
563:     PetscMalloc1(newMax, &buff->ptr);
564:     buff->size = newMax;
565:   }
566:   return(0);
567: }

569: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
570: {
572:   for (;stacksize; --stacksize) {
573:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
574:     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
575:       PetscInt       l, m = stack[stacksize].start, r;
577:       /* Search A for B[0] insertion */
578:       PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
579:       /* Search B for A[-1] insertion */
580:       PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
581:       if (m-l <= r-m) {
582:         PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
583:         PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
584:       } else {
585:         PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
586:         PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
587:       }
588:     }
589:     /* Update A with merge */
590:     stack[stacksize-1].size += stack[stacksize].size;
591:   }
592:   return(0);
593: }

595: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596: {
598:   for (;stacksize; --stacksize) {
599:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600:     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
601:       PetscInt       l, m = stack[stacksize].start, r;
603:       /* Search A for B[0] insertion */
604:       PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
605:       /* Search B for A[-1] insertion */
606:       PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
607:       if (m-l <= r-m) {
608:         PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
609:         PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
610:         PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
611:       } else {
612:         PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
613:         PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
614:         PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
615:       }
616:     }
617:     /* Update A with merge */
618:     stack[stacksize-1].size += stack[stacksize].size;
619:   }
620:   return(0);
621: }

623: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
624: {
625:   PetscInt       i = *stacksize;

629:   while (i) {
630:     PetscInt l, m, r, itemp = i;

632:     if (i == 1) {
633:       /* A = stack[i-1], B = stack[i] */
634:       if (stack[i-1].size < stack[i].size) {
635:         /* if A[-1] <= B[0] then sorted */
636:         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
637:           m = stack[i].start;
638:           /* Search A for B[0] insertion */
639:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
640:           /* Search B for A[-1] insertion */
641:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
642:           if (m-l <= r-m) {
643:             PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
644:             PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
645:           } else {
646:             PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
647:             PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
648:           }
649:         }
650:         /* Update A with merge */
651:         stack[i-1].size += stack[i].size;
652:         --i;
653:       }
654:     } else {
655:       /* i > 2, i.e. C exists
656:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
657:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
658:         if (stack[i-2].size < stack[i].size) {
659:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
660:           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
661:             m = stack[i-1].start;
662:             /* Search A for B[0] insertion */
663:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
664:             /* Search B for A[-1] insertion */
665:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
666:             if (m-l <= r-m) {
667:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
668:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
669:             } else {
670:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
671:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
672:             }
673:           }
674:           /* Update A with merge */
675:           stack[i-2].size += stack[i-1].size;
676:           /* Push C up the stack */
677:           stack[i-1].start = stack[i].start;
678:           stack[i-1].size = stack[i].size;
679:         } else {
680:           /* merge C into B */
681:           mergeBC:
682:           /* If B[-1] <= C[0] then... you know the drill */
683:           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
684:             m = stack[i].start;
685:             /* Search B for C[0] insertion */
686:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
687:             /* Search C for B[-1] insertion */
688:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
689:             if (m-l <= r-m) {
690:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
691:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
692:             } else {
693:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
694:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
695:             }
696:           }
697:           /* Update B with merge */
698:           stack[i-1].size += stack[i].size;
699:         }
700:         --i;
701:       } else if (stack[i-1].size <= stack[i].size) {
702:         /* merge C into B */
703:         goto mergeBC;
704:       }
705:     }
706:     if (itemp == i) break;
707:   }
708:   *stacksize = i;
709:   return(0);
710: }

712: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
713: {
714:   PetscInt       i = *stacksize;

718:   while (i) {
719:     PetscInt l, m, r, itemp = i;

721:     if (i == 1) {
722:       /* A = stack[i-1], B = stack[i] */
723:       if (stack[i-1].size < stack[i].size) {
724:         /* if A[-1] <= B[0] then sorted */
725:         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
726:           m = stack[i].start;
727:           /* Search A for B[0] insertion */
728:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
729:           /* Search B for A[-1] insertion */
730:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
731:           if (m-l <= r-m) {
732:             PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
733:             PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
734:             PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
735:           } else {
736:             PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
737:             PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
738:             PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
739:           }
740:         }
741:         /* Update A with merge */
742:         stack[i-1].size += stack[i].size;
743:         --i;
744:       }
745:     } else {
746:       /* i > 2, i.e. C exists
747:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
748:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
749:         if (stack[i-2].size < stack[i].size) {
750:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
751:           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
752:             m = stack[i-1].start;
753:             /* Search A for B[0] insertion */
754:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
755:             /* Search B for A[-1] insertion */
756:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
757:             if (m-l <= r-m) {
758:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
759:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
760:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
761:             } else {
762:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
763:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
764:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
765:             }
766:           }
767:           /* Update A with merge */
768:           stack[i-2].size += stack[i-1].size;
769:           /* Push C up the stack */
770:           stack[i-1].start = stack[i].start;
771:           stack[i-1].size = stack[i].size;
772:         } else {
773:           /* merge C into B */
774:           mergeBC:
775:           /* If B[-1] <= C[0] then... you know the drill */
776:           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
777:             m = stack[i].start;
778:             /* Search B for C[0] insertion */
779:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
780:             /* Search C for B[-1] insertion */
781:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
782:             if (m-l <= r-m) {
783:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
784:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
785:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
786:             } else {
787:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
788:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
789:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
790:             }
791:           }
792:           /* Update B with merge */
793:           stack[i-1].size += stack[i].size;
794:         }
795:         --i;
796:       } else if (stack[i-1].size <= stack[i].size) {
797:         /* merge C into B */
798:         goto mergeBC;
799:       }
800:     }
801:     if (itemp == i) break;
802:   }
803:   *stacksize = i;
804:   return(0);
805: }

807: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
808:  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
809:  binary insertion sort or regulat insertion sort */
810: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
811: {
812:   const PetscInt re = PetscMin(runstart+minrun, n-1);
813:   PetscInt       ri = runstart;

816:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
817:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818:   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
819:     ++ri;
820:     while (ri < n-1) {
821:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
822:       ++ri;
823:     }
824:     {
825:       PetscInt lo = runstart, hi = ri;
826:       for (; lo < hi; ++lo, --hi) {
827:         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
828:       }
829:     }
830:   } else {
831:     ++ri;
832:     while (ri < n-1) {
833:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
834:       ++ri;
835:     }
836:   }
837: #if defined(PETSC_USE_DEBUG)
838:   {
840:     PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
841:   }
842: #endif
843:   if (ri < re) {
844:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
845:      binary search */
846:     if (ri-runstart <= minrun >> 1) {
847:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
848:       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
849:     } else {
850:       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
851:     }
852:     *runend = re;
853:   } else *runend = ri;
854:   return(0);
855: }

857: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
858: {
859:   const PetscInt re = PetscMin(runstart+minrun, n-1);
860:   PetscInt       ri = runstart;

863:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
864:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
865:   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
866:     ++ri;
867:     while (ri < n-1) {
868:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
869:       ++ri;
870:     }
871:     {
872:       PetscInt lo = runstart, hi = ri;
873:       for (; lo < hi; ++lo, --hi) {
874:         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
875:       }
876:     }
877:   } else {
878:     ++ri;
879:     while (ri < n-1) {
880:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
881:       ++ri;
882:     }
883:   }
884: #if defined(PETSC_USE_DEBUG)
885:   {
887:     PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
888:   }
889: #endif
890:   if (ri < re) {
891:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
892:      binary search */
893:     if (ri-runstart <= minrun >> 1) {
894:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
895:       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
896:     } else {
897:       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
898:     }
899:     *runend = re;
900:   } else *runend = ri;
901:   return(0);
902: }

904: /*@C
905:   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.

907:   Not Collective

909:   Input Parameters:
910: + n    - number of values
911: . arr  - array to be sorted
912: . size - size in bytes of the datatype held in arr
913: . cmp  - function pointer to comparison function
914: - ctx  - optional context to be passed to comparison function, NULL if not needed

916:   Output Parameters:
917: . arr  - sorted array

919:   Notes:
920:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
921:  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
922:  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
923:  do so it repeatedly triggers attempts throughout to merge adjacent runs.

925:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
926:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
927:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
928:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
929:   likely all contain similar data.

931:   Sample usage:
932:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
933:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
934:   may also
935:  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
936:  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
937:   order

939: .vb
940:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
941:     my_type l = *(my_type *) left, r = *(my_type *) right;
942:     return (l < r) ? -1 : (l > r);
943:   }
944: .ve
945:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
946:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
947:   Then pass the function
948: .vb
949:   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
950: .ve

952:   Fortran Notes:
953:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
954:   returns result. For example
955: .vb
956:  subroutine CompareIntegers(left,right,ctx,result)
957:    implicit none

959:    PetscInt,intent(in) :: left, right
960:    type(UserCtx)       :: ctx
961:    integer,intent(out) :: result

963:    if (left < right) then
964:      result = -1
965:    else if (left == right) then
966:      result = 0
967:    else
968:      result = 1
969:    end if
970:    return
971:  end subroutine CompareIntegers
972: .ve

974:   References:
975:   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt

977:   Level: developer

979: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
980: @*/
981: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
982: {
983:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
984:   PetscTimSortStack  runstack[128];
985:   PetscTimSortBuffer buff;
986:   PetscErrorCode     ierr;
987:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
988:    It is so unlikely that this limit is reached that this is __never__ checked for */

991:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
992:    is a power of 2 or one plus a power of 2 */
993:   {
994:     PetscInt t = n, r = 0;
995:     /* r becomes 1 if the least significant bits contain at least one off bit */
996:     while (t >= 64) {
997:       r |= t & 1;
998:       t >>= 1;
999:     }
1000:     minrun = t + r;
1001:   }
1002:   if (PetscDefined(USE_DEBUG)) {
1003:     PetscInfo1(NULL, "minrun = %D\n", minrun);
1004:     if (n < 64) {
1005:       PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1006:     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1007:   }
1008:   PetscMalloc1((size_t) minrun*size, &buff.ptr);
1009:   buff.size = (size_t) minrun*size;
1010:   buff.maxsize = (size_t) n*size;
1011:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1012:   while (runstart < n) {
1013:     /* Check if additional entries are at least partially ordered and build natural run */
1014:     PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
1015:     runstack[stacksize].start = runstart;
1016:     runstack[stacksize].size = runend-runstart+1;
1017:     PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
1018:     ++stacksize;
1019:     runstart = runend+1;
1020:   }
1021:   /* Have been inside while, so discard last stacksize++ */
1022:   --stacksize;
1023:   PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
1024:   PetscFree(buff.ptr);
1025:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1026:   return(0);
1027: }

1029: /*@C
1030:   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1031:   reorders a second array to match the first. The arrays need not be the same type.

1033:   Not Collective

1035:   Input Parameters:
1036: + n     - number of values
1037: . asize - size in bytes of the datatype held in arr
1038: . bsize - size in bytes of the datatype held in barr
1039: . cmp   - function pointer to comparison function
1040: - ctx   - optional context to be passed to comparison function, NULL if not needed

1042:   Input/Output Parameters:
1043: + arr  - array to be sorted, on output it is sorted
1044: - barr - array to be reordered, on output it is reordered

1046:   Notes:
1047:   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1048:   overlap.

1050:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1051:   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1052:  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1053:  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.

1055:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1056:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1057:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1058:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1059:   likely all contain similar data.

1061:   Sample usage:
1062:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1063:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1064:   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1065:   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1066:   increasing order

1068: .vb
1069:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1070:     my_type l = *(my_type *) left, r = *(my_type *) right;
1071:     return (l < r) ? -1 : (l > r);
1072:   }
1073: .ve
1074:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1075:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1076:   Then pass the function
1077: .vb
1078:   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1079: .ve

1081:   Fortran Notes:
1082:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1083:   returns result. For example
1084: .vb
1085:  subroutine CompareIntegers(left,right,ctx,result)
1086:    implicit none

1088:    PetscInt,intent(in) :: left, right
1089:    type(UserCtx)       :: ctx
1090:    integer,intent(out) :: result

1092:    if (left < right) then
1093:      result = -1
1094:    else if (left == right) then
1095:      result = 0
1096:    else
1097:      result = 1
1098:    end if
1099:    return
1100:  end subroutine CompareIntegers
1101: .ve

1103:   References:
1104:   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt

1106:   Level: developer

1108: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1109: @*/
1110: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1111: {
1112:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1113:   PetscTimSortStack  runstack[128];
1114:   PetscTimSortBuffer abuff, bbuff;
1115:   PetscErrorCode     ierr;
1116:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1117:    It is so unlikely that this limit is reached that this is __never__ checked for */

1120:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1121:    is a power of 2 or one plus a power of 2 */
1122:   {
1123:     PetscInt t = n, r = 0;
1124:     /* r becomes 1 if the least significant bits contain at least one off bit */
1125:     while (t >= 64) {
1126:       r |= t & 1;
1127:       t >>= 1;
1128:     }
1129:     minrun = t + r;
1130:   }
1131:   if (PetscDefined(USE_DEBUG)) {
1132:     PetscInfo1(NULL, "minrun = %D\n", minrun);
1133:     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1134:   }
1135:   PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1136:   abuff.size = (size_t) minrun*asize;
1137:   abuff.maxsize = (size_t) n*asize;
1138:   PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1139:   bbuff.size = (size_t) minrun*bsize;
1140:   bbuff.maxsize = (size_t) n*bsize;
1141:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1142:   while (runstart < n) {
1143:     /* Check if additional entries are at least partially ordered and build natural run */
1144:     PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1145:     runstack[stacksize].start = runstart;
1146:     runstack[stacksize].size = runend-runstart+1;
1147:     PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1148:     ++stacksize;
1149:     runstart = runend+1;
1150:   }
1151:   /* Have been inside while, so discard last stacksize++ */
1152:   --stacksize;
1153:   PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1154:   PetscFree(abuff.ptr);
1155:   PetscFree(bbuff.ptr);
1156:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1157:   return(0);
1158: }

1160: /*@
1161:    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.

1163:    Not Collective

1165:    Input Parameters:
1166: +  n   - number of values
1167: -  arr - array of integers

1169:    Output Parameters:
1170: .  arr - sorted array of integers

1172:    Notes:
1173:    If the array is less than 64 entries long PetscSortInt() is automatically used.

1175:    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1176:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1177:    recommended that the user benchmark their code to see which routine is fastest.

1179:    Level: intermediate

1181: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1182: @*/
1183: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1184: {
1187:   if (n <= 1) return(0);
1189:   if (n < 64) {
1190:     PetscSortInt(n, arr);
1191:   } else {
1192:     PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1193:   }
1194:   return(0);
1195: }

1197: /*@
1198:    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1199:    array to match the first.

1201:    Not Collective

1203:    Input Parameter:
1204: .  n   - number of values

1206:    Input/Output Parameters:
1207: +  arr1 - array of integers to be sorted, modified on output
1208: -  arr2 - array of integers to be reordered, modified on output

1210:    Notes:
1211:    The arrays CANNOT overlap.

1213:    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1214:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1215:    recommended that the user benchmark their code to see which routine is fastest.

1217:    Level: intermediate

1219: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1220: @*/
1221: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1222: {
1225:   if (n <= 1) return(0);
1228:   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1229:   PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1230:   return(0);
1231: }

1233: /*@
1234:    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.

1236:    Not Collective

1238:    Input Parameters:
1239: +  n   - number of values
1240: -  arr - array of PetscMPIInts

1242:    Output Parameters:
1243: .  arr - sorted array of integers

1245:    Notes:
1246:    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.

1248:    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1249:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1250:    recommended that the user benchmark their code to see which routine is fastest.

1252:    Level: intermediate

1254: .seealso: PetscTimSort(), PetscSortMPIInt()
1255: @*/
1256: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1257: {
1258:   PetscErrorCode  ierr;

1261:   if (n <= 1) return(0);
1263:   if (n < 64) {
1264:     PetscSortMPIInt(n, arr);
1265:   } else {
1266:     PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1267:   }
1268:   return(0);
1269: }

1271: /*@
1272:    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1273:    array to match the first.

1275:    Not Collective

1277:    Input Parameter:
1278: .  n   - number of values

1280:    Input/Output Parameters:
1281: .  arr1 - array of integers to be sorted, modified on output
1282: -  arr2 - array of integers to be reordered, modified on output

1284:    Notes:
1285:    The arrays CANNOT overlap.

1287:    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1288:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1289:    recommended that the user benchmark their code to see which routine is fastest.

1291:    Level: intermediate

1293: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1294: @*/
1295: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1296: {
1299:   if (n <= 1) return(0);
1302:   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1303:   PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1304:   return(0);
1305: }

1307: /*@
1308:    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.

1310:    Not Collective

1312:    Input Parameters:
1313: +  n   - number of values
1314: -  arr - array of PetscReals

1316:    Output Parameters:
1317: .  arr - sorted array of integers

1319:    Notes:
1320:    If the array is less than 64 entries long PetscSortReal() is automatically used.

1322:    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1323:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1324:    recommended that the user benchmark their code to see which routine is fastest.

1326:    Level: intermediate

1328: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1329: @*/
1330: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1331: {
1332:   PetscErrorCode  ierr;

1335:   if (n <= 1) return(0);
1337:   if (n < 64) {
1338:     PetscSortReal(n, arr);
1339:   } else {
1340:     PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1341:   }
1342:   return(0);
1343: }

1345: /*@
1346:    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1347:    array of PetscInts to match the first.

1349:    Not Collective

1351:    Input Parameter:
1352: .  n   - number of values

1354:    Input/Output Parameters:
1355: .  arr1 - array of PetscReals to be sorted, modified on output
1356: -  arr2 - array of PetscReals to be reordered, modified on output

1358:    Notes:
1359:    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1360:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1361:    recommended that the user benchmark their code to see which routine is fastest.

1363:    Level: intermediate

1365: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1366: @*/
1367: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1368: {
1371:   if (n <= 1) return(0);
1374:   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1375:   PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1376:   return(0);
1377: }