Actual source code: sortso.c

  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
  5: {
  6:   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
  7:   return (l < r) ? -1 : (l > r);
  8: }

 10: PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 11: {
 12:   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
 13:   return (l < r) ? -1 : (l > r);
 14: }

 16: PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 17: {
 18:   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
 19:   return (l < r) ? -1 : (l > r);
 20: }

 22: #define MIN_GALLOP_CONST_GLOBAL 8
 23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
 24: typedef int (*CompFunc)(const void *, const void *, void *);

 26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
 27: #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
 28: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 29: {
 30:   __builtin_memcpy(t, b, size);
 31:   __builtin_memmove(b, a, size);
 32:   __builtin_memcpy(a, t, size);
 33:   return;
 34: }

 36: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 37: {
 38:   __builtin_memcpy(t, ar, asize);
 39:   __builtin_memmove(ar, al, asize);
 40:   __builtin_memcpy(al, t, asize);
 41:   __builtin_memcpy(t, br, bsize);
 42:   __builtin_memmove(br, bl, bsize);
 43:   __builtin_memcpy(bl, t, bsize);
 44:   return;
 45: }

 47: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
 48: {
 49:   __builtin_memcpy(dest, src, size);
 50:   return;
 51: }

 53: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 54: {
 55:   __builtin_memcpy(adest, asrc, asize);
 56:   __builtin_memcpy(bdest, bsrc, bsize);
 57:   return;
 58: }

 60: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
 61: {
 62:   __builtin_memmove(dest, src, size);
 63:   return;
 64: }

 66: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 67: {
 68:   __builtin_memmove(adest, asrc, asize);
 69:   __builtin_memmove(bdest, bsrc, bsize);
 70:   return;
 71: }
 72: # else
 73: PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 74: {
 77:   PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 78:   PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 79:   PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
 80:   PetscFunctionReturnVoid();
 81: }

 83: PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 84: {
 87:   PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 88:   PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 89:   PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 90:   PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 91:   PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 92:   PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
 93:   PetscFunctionReturnVoid();
 94: }

 96: PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
 97: {
100:   PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101:   PetscFunctionReturnVoid();
102: }

104: PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105: {
108:   PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109:   PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110:   PetscFunctionReturnVoid();
111: }

113: PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114: {
117:   PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118:   PetscFunctionReturnVoid();
119: }

121: PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122: {
125:   PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126:   PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127:   PetscFunctionReturnVoid();
128: }
129: #endif

131: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132:  x. Output also inclusive.

134:  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:

136:    {0,1,2,3,4,5}

138:    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139:   */
140: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141: {
142:   PetscInt last = l, k = 1, mid, cur = l+1;

145:   *m = l;
146:   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147:   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; return(0);}
148:   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) return(0);
149:   while (PETSC_TRUE) {
150:     if (PetscUnlikely(cur > r)) {cur = r; break;}
151:     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152:     last = cur;
153:     cur += (k <<= 1) + 1;
154:     ++k;
155:   }
156:   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157:   while (cur > last + 1) {
158:     mid = last + ((cur - last) >> 1);
159:     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160:       cur = mid;
161:     } else {
162:       last = mid;
163:     }
164:   }
165:   *m = cur;
166:   return(0);
167: }

169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170:  < x. Output also inclusive */
171: PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172: {
173:   PetscInt last = r, k = 1, mid, cur = r-1;

176:   *m = r;
177:   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178:   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; return(0);}
179:   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) return(0);
180:   while (PETSC_TRUE) {
181:     if (PetscUnlikely(cur < l)) {cur = l; break;}
182:     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183:     last = cur;
184:     cur -= (k <<= 1) + 1;
185:     ++k;
186:   }
187:   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188:   while (last > cur + 1) {
189:     mid = last - ((last - cur) >> 1);
190:     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191:       cur = mid;
192:     } else {
193:       last = mid;
194:     }
195:   }
196:   *m = cur;
197:   return(0);
198: }

200: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202:  array */
203: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204: {
205:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206:   const PetscInt llen = mid-left;

210:   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211:   while ((i < llen) && (j <= right)) {
212:     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213:       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214:       ++k;
215:       gallopright = 0;
216:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217:         PetscInt l1, l2, diff1, diff2;
218:         do {
219:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220:           /* search temp for right[j], can move up to that of temp into arr immediately */
221:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
222:           diff1 = l1-i;
223:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224:           k += diff1;
225:           i = l1;
226:           /* search right for temp[i], can move up to that many of right into arr */
227:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
228:           diff2 = l2-j;
229:           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230:           k += diff2;
231:           j = l2;
232:           if (i >= llen || j > right) break;
233:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234:         ++MIN_GALLOP_GLOBAL;
235:       }
236:     } else {
237:       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238:       ++k;
239:       gallopleft = 0;
240:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241:         PetscInt l1, l2, diff1, diff2;
242:         do {
243:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244:           /* search right for temp[i], can move up to that many of right into arr */
245:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);
246:           diff2 = l2-j;
247:           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248:           k += diff2;
249:           j = l2;
250:           /* search temp for right[j], can copy up to that of temp into arr immediately */
251:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);
252:           diff1 = l1-i;
253:           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254:           k += diff1;
255:           i = l1;
256:           if (i >= llen || j > right) break;
257:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258:         ++MIN_GALLOP_GLOBAL;
259:       }
260:     }
261:   }
262:   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263:   return(0);
264: }

266: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267: {
268:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269:   const PetscInt llen = mid-left;

273:   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274:   while ((i < llen) && (j <= right)) {
275:     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276:       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277:       ++k;
278:       gallopright = 0;
279:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280:         PetscInt l1, l2, diff1, diff2;
281:         do {
282:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283:           /* search temp for right[j], can move up to that of temp into arr immediately */
284:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
285:           diff1 = l1-i;
286:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287:           k += diff1;
288:           i = l1;
289:           /* search right for temp[i], can move up to that many of right into arr */
290:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
291:           diff2 = l2-j;
292:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293:           k += diff2;
294:           j = l2;
295:           if (i >= llen || j > right) break;
296:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297:         ++MIN_GALLOP_GLOBAL;
298:       }
299:     } else {
300:       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301:       ++k;
302:       gallopleft = 0;
303:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304:         PetscInt l1, l2, diff1, diff2;
305:         do {
306:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307:           /* search right for temp[i], can move up to that many of right into arr */
308:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);
309:           diff2 = l2-j;
310:           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311:           k += diff2;
312:           j = l2;
313:           /* search temp for right[j], can copy up to that of temp into arr immediately */
314:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);
315:           diff1 = l1-i;
316:           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317:           k += diff1;
318:           i = l1;
319:           if (i >= llen || j > right) break;
320:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321:         ++MIN_GALLOP_GLOBAL;
322:       }
323:     }
324:   }
325:   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326:   return(0);
327: }

329: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331:  array */
332: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333: {
334:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335:   const PetscInt rlen = right-mid+1;

339:   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340:   while ((i >= 0) && (j >= left)) {
341:     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342:       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343:       --k;
344:       gallopleft = 0;
345:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346:         PetscInt l1, l2, diff1, diff2;
347:         do {
348:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349:           /* search temp for left[j], can copy up to that many of temp into arr */
350:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
351:           diff1 = i-l1;
352:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353:           k -= diff1;
354:           i = l1;
355:           /* search left for temp[i], can move up to that many of left up arr */
356:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
357:           diff2 = j-l2;
358:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359:           k -= diff2;
360:           j = l2;
361:           if (i < 0 || j < left) break;
362:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363:         ++MIN_GALLOP_GLOBAL;
364:       }
365:     } else {
366:       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367:       --k;
368:       gallopright = 0;
369:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370:         PetscInt l1, l2, diff1, diff2;
371:         do {
372:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373:           /* search left for temp[i], can move up to that many of left up arr */
374:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);
375:           diff2 = j-l2;
376:           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377:           k -= diff2;
378:           j = l2;
379:           /* search temp for left[j], can copy up to that many of temp into arr */
380:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);
381:           diff1 = i-l1;
382:           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383:           k -= diff1;
384:           i = l1;
385:           if (i < 0 || j < left) break;
386:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387:         ++MIN_GALLOP_GLOBAL;
388:       }
389:     }
390:   }
391:   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392:   return(0);
393: }

395: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396: {
397:   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398:   const PetscInt rlen = right-mid+1;

402:   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403:   while ((i >= 0) && (j >= left)) {
404:     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405:       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406:       --k;
407:       gallopleft = 0;
408:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409:         PetscInt l1, l2, diff1, diff2;
410:         do {
411:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412:           /* search temp for left[j], can copy up to that many of temp into arr */
413:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
414:           diff1 = i-l1;
415:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416:           k -= diff1;
417:           i = l1;
418:           /* search left for temp[i], can move up to that many of left up arr */
419:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
420:           diff2 = j-l2;
421:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422:           k -= diff2;
423:           j = l2;
424:           if (i < 0 || j < left) break;
425:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426:         ++MIN_GALLOP_GLOBAL;
427:       }
428:     } else {
429:       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430:       --k;
431:       gallopright = 0;
432:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433:         PetscInt l1, l2, diff1, diff2;
434:         do {
435:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436:           /* search left for temp[i], can move up to that many of left up arr */
437:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);
438:           diff2 = j-l2;
439:           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440:           k -= diff2;
441:           j = l2;
442:           /* search temp for left[j], can copy up to that many of temp into arr */
443:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);
444:           diff1 = i-l1;
445:           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446:           k -= diff1;
447:           i = l1;
448:           if (i < 0 || j < left) break;
449:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450:         ++MIN_GALLOP_GLOBAL;
451:       }
452:     }
453:   }
454:   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455:   return(0);
456: }

458: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459:  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461: {
462:   PetscInt i = start == left ? start+1 : start;

465:   for (; i <= right; ++i) {
466:     PetscInt j = i-1;
467:     Petsc_memcpy(tarr, arr+(i*size), size);
468:     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469:       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470:       --j;
471:     }
472:     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473:   }
474:   return(0);
475: }

477: PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478: {
479:   PetscInt i = start == left ? start+1 : start;

482:   for (; i <= right; ++i) {
483:     PetscInt j = i-1;
484:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485:     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486:       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487:       --j;
488:     }
489:     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490:   }
491:   return(0);
492: }

494: /* See PetscInsertionSort_Private */
495: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496: {
497:   PetscInt i = start == left ? start+1 : start;

500:   for (; i <= right; ++i) {
501:     PetscInt l = left, r = i;
502:     Petsc_memcpy(tarr, arr+(i*size), size);
503:     do {
504:       const PetscInt m = l + ((r - l) >> 1);
505:       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506:         r = m;
507:       } else {
508:         l = m + 1;
509:       }
510:     } while (l < r);
511:     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512:     Petsc_memcpy(arr+(l*size), tarr, size);
513:   }
514:   return(0);
515: }

517: PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518: {
519:   PetscInt i = start == left ? start+1 : start;

522:   for (; i <= right; ++i) {
523:     PetscInt l = left, r = i;
524:     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525:     do {
526:       const PetscInt m = l + ((r - l) >> 1);
527:       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528:         r = m;
529:       } else {
530:         l = m + 1;
531:       }
532:     } while (l < r);
533:     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534:     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535:   }
536:   return(0);
537: }

539: typedef struct {
540:   PetscInt size;
541:   PetscInt start;
542: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));

544: typedef struct {
545:   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546:   size_t size;
547:   size_t maxsize;
548: } PetscTimSortBuffer;

550: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551: {
553:   if (PetscLikely(newSize <= buff->size)) return(0);
554:   {
555:     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
557:     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558:     PetscFree(buff->ptr);
559:     PetscMalloc1(newMax, &buff->ptr);
560:     buff->size = newMax;
561:   }
562:   return(0);
563: }

565: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566: {
568:   for (;stacksize; --stacksize) {
569:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570:     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571:       PetscInt       l, m = stack[stacksize].start, r;
573:       /* Search A for B[0] insertion */
574:       PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);
575:       /* Search B for A[-1] insertion */
576:       PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);
577:       if (m-l <= r-m) {
578:         PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
579:         PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
580:       } else {
581:         PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
582:         PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
583:       }
584:     }
585:     /* Update A with merge */
586:     stack[stacksize-1].size += stack[stacksize].size;
587:   }
588:   return(0);
589: }

591: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592: {
594:   for (;stacksize; --stacksize) {
595:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596:     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597:       PetscInt       l, m = stack[stacksize].start, r;
599:       /* Search A for B[0] insertion */
600:       PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);
601:       /* Search B for A[-1] insertion */
602:       PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);
603:       if (m-l <= r-m) {
604:         PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
605:         PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
606:         PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
607:       } else {
608:         PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
609:         PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
610:         PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
611:       }
612:     }
613:     /* Update A with merge */
614:     stack[stacksize-1].size += stack[stacksize].size;
615:   }
616:   return(0);
617: }

619: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620: {
621:   PetscInt       i = *stacksize;

625:   while (i) {
626:     PetscInt l, m, r, itemp = i;

628:     if (i == 1) {
629:       /* A = stack[i-1], B = stack[i] */
630:       if (stack[i-1].size < stack[i].size) {
631:         /* if A[-1] <= B[0] then sorted */
632:         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633:           m = stack[i].start;
634:           /* Search A for B[0] insertion */
635:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);
636:           /* Search B for A[-1] insertion */
637:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);
638:           if (m-l <= r-m) {
639:             PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
640:             PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
641:           } else {
642:             PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
643:             PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
644:           }
645:         }
646:         /* Update A with merge */
647:         stack[i-1].size += stack[i].size;
648:         --i;
649:       }
650:     } else {
651:       /* i > 2, i.e. C exists
652:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654:         if (stack[i-2].size < stack[i].size) {
655:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656:           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657:             m = stack[i-1].start;
658:             /* Search A for B[0] insertion */
659:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);
660:             /* Search B for A[-1] insertion */
661:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);
662:             if (m-l <= r-m) {
663:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
664:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
665:             } else {
666:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
667:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
668:             }
669:           }
670:           /* Update A with merge */
671:           stack[i-2].size += stack[i-1].size;
672:           /* Push C up the stack */
673:           stack[i-1].start = stack[i].start;
674:           stack[i-1].size = stack[i].size;
675:         } else {
676:           /* merge C into B */
677:           mergeBC:
678:           /* If B[-1] <= C[0] then... you know the drill */
679:           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680:             m = stack[i].start;
681:             /* Search B for C[0] insertion */
682:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);
683:             /* Search C for B[-1] insertion */
684:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);
685:             if (m-l <= r-m) {
686:               PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);
687:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
688:             } else {
689:               PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);
690:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
691:             }
692:           }
693:           /* Update B with merge */
694:           stack[i-1].size += stack[i].size;
695:         }
696:         --i;
697:       } else if (stack[i-1].size <= stack[i].size) {
698:         /* merge C into B */
699:         goto mergeBC;
700:       }
701:     }
702:     if (itemp == i) break;
703:   }
704:   *stacksize = i;
705:   return(0);
706: }

708: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709: {
710:   PetscInt       i = *stacksize;

714:   while (i) {
715:     PetscInt l, m, r, itemp = i;

717:     if (i == 1) {
718:       /* A = stack[i-1], B = stack[i] */
719:       if (stack[i-1].size < stack[i].size) {
720:         /* if A[-1] <= B[0] then sorted */
721:         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722:           m = stack[i].start;
723:           /* Search A for B[0] insertion */
724:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);
725:           /* Search B for A[-1] insertion */
726:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);
727:           if (m-l <= r-m) {
728:             PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
729:             PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
730:             PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
731:           } else {
732:             PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
733:             PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
734:             PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
735:           }
736:         }
737:         /* Update A with merge */
738:         stack[i-1].size += stack[i].size;
739:         --i;
740:       }
741:     } else {
742:       /* i > 2, i.e. C exists
743:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744:       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745:         if (stack[i-2].size < stack[i].size) {
746:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747:           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748:             m = stack[i-1].start;
749:             /* Search A for B[0] insertion */
750:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);
751:             /* Search B for A[-1] insertion */
752:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);
753:             if (m-l <= r-m) {
754:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
755:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
756:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
757:             } else {
758:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
759:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
760:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
761:             }
762:           }
763:           /* Update A with merge */
764:           stack[i-2].size += stack[i-1].size;
765:           /* Push C up the stack */
766:           stack[i-1].start = stack[i].start;
767:           stack[i-1].size = stack[i].size;
768:         } else {
769:           /* merge C into B */
770:           mergeBC:
771:           /* If B[-1] <= C[0] then... you know the drill */
772:           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773:             m = stack[i].start;
774:             /* Search B for C[0] insertion */
775:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);
776:             /* Search C for B[-1] insertion */
777:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);
778:             if (m-l <= r-m) {
779:               PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);
780:               PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);
781:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
782:             } else {
783:               PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);
784:               PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);
785:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
786:             }
787:           }
788:           /* Update B with merge */
789:           stack[i-1].size += stack[i].size;
790:         }
791:         --i;
792:       } else if (stack[i-1].size <= stack[i].size) {
793:         /* merge C into B */
794:         goto mergeBC;
795:       }
796:     }
797:     if (itemp == i) break;
798:   }
799:   *stacksize = i;
800:   return(0);
801: }

803: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804:  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805:  binary insertion sort or regulat insertion sort */
806: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807: {
808:   const PetscInt re = PetscMin(runstart+minrun, n-1);
809:   PetscInt       ri = runstart;

812:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
813:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814:   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815:     ++ri;
816:     while (ri < n-1) {
817:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818:       ++ri;
819:     }
820:     {
821:       PetscInt lo = runstart, hi = ri;
822:       for (; lo < hi; ++lo, --hi) {
823:         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824:       }
825:     }
826:   } else {
827:     ++ri;
828:     while (ri < n-1) {
829:       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830:       ++ri;
831:     }
832:   }
833: #if defined(PETSC_USE_DEBUG)
834:   {
836:     PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
837:   }
838: #endif
839:   if (ri < re) {
840:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841:      binary search */
842:     if (ri-runstart <= minrun >> 1) {
843:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844:       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845:     } else {
846:       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847:     }
848:     *runend = re;
849:   } else *runend = ri;
850:   return(0);
851: }

853: PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854: {
855:   const PetscInt re = PetscMin(runstart+minrun, n-1);
856:   PetscInt       ri = runstart;

859:   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; return(0);}
860:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861:   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862:     ++ri;
863:     while (ri < n-1) {
864:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865:       ++ri;
866:     }
867:     {
868:       PetscInt lo = runstart, hi = ri;
869:       for (; lo < hi; ++lo, --hi) {
870:         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871:       }
872:     }
873:   } else {
874:     ++ri;
875:     while (ri < n-1) {
876:       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877:       ++ri;
878:     }
879:   }
880: #if defined(PETSC_USE_DEBUG)
881:   {
883:     PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);
884:   }
885: #endif
886:   if (ri < re) {
887:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888:      binary search */
889:     if (ri-runstart <= minrun >> 1) {
890:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891:       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892:     } else {
893:       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894:     }
895:     *runend = re;
896:   } else *runend = ri;
897:   return(0);
898: }

900: /*@C
901:   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.

903:   Not Collective

905:   Input Parameters:
906: + n    - number of values
907: . arr  - array to be sorted
908: . size - size in bytes of the datatype held in arr
909: . cmp  - function pointer to comparison function
910: - ctx  - optional context to be passed to comparison function, NULL if not needed

912:   Output Parameters:
913: . arr  - sorted array

915:   Notes:
916:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917:  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918:  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919:  do so it repeatedly triggers attempts throughout to merge adjacent runs.

921:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925:   likely all contain similar data.

927:   Sample usage:
928:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930:   may also
931:  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932:  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933:   order

935: .vb
936:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937:     my_type l = *(my_type *) left, r = *(my_type *) right;
938:     return (l < r) ? -1 : (l > r);
939:   }
940: .ve
941:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
942:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943:   Then pass the function
944: .vb
945:   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946: .ve

948:   Fortran Notes:
949:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950:   returns result. For example
951: .vb
952:  subroutine CompareIntegers(left,right,ctx,result)
953:    implicit none

955:    PetscInt,intent(in) :: left, right
956:    type(UserCtx)       :: ctx
957:    integer,intent(out) :: result

959:    if (left < right) then
960:      result = -1
961:    else if (left == right) then
962:      result = 0
963:    else
964:      result = 1
965:    end if
966:    return
967:  end subroutine CompareIntegers
968: .ve

970:   References:
971:   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt

973:   Level: developer

975: .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976: @*/
977: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978: {
979:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
980:   PetscTimSortStack  runstack[128];
981:   PetscTimSortBuffer buff;
982:   PetscErrorCode     ierr;
983:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984:    It is so unlikely that this limit is reached that this is __never__ checked for */

987:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988:    is a power of 2 or one plus a power of 2 */
989:   {
990:     PetscInt t = n, r = 0;
991:     /* r becomes 1 if the least significant bits contain at least one off bit */
992:     while (t >= 64) {
993:       r |= t & 1;
994:       t >>= 1;
995:     }
996:     minrun = t + r;
997:   }
998:   if (PetscDefined(USE_DEBUG)) {
999:     PetscInfo1(NULL, "minrun = %D\n", minrun);
1000:     if (n < 64) {
1001:       PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);
1002:     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003:   }
1004:   PetscMalloc1((size_t) minrun*size, &buff.ptr);
1005:   buff.size = (size_t) minrun*size;
1006:   buff.maxsize = (size_t) n*size;
1007:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008:   while (runstart < n) {
1009:     /* Check if additional entries are at least partially ordered and build natural run */
1010:     PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
1011:     runstack[stacksize].start = runstart;
1012:     runstack[stacksize].size = runend-runstart+1;
1013:     PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
1014:     ++stacksize;
1015:     runstart = runend+1;
1016:   }
1017:   /* Have been inside while, so discard last stacksize++ */
1018:   --stacksize;
1019:   PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
1020:   PetscFree(buff.ptr);
1021:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022:   return(0);
1023: }

1025: /*@C
1026:   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027:   reorders a second array to match the first. The arrays need not be the same type.

1029:   Not Collective

1031:   Input Parameters:
1032: + n     - number of values
1033: . arr   - array to be sorted
1034: . asize - size in bytes of the datatype held in arr
1035: . barr  - array to be reordered
1036: . asize - size in bytes of the datatype held in barr
1037: . cmp   - function pointer to comparison function
1038: - ctx   - optional context to be passed to comparison function, NULL if not needed

1040:   Output Parameters:
1041: + arr  - sorted array
1042: - barr - reordered array

1044:   Notes:
1045:   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046:   overlap.

1048:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049:   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050:  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051:  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.

1053:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057:   likely all contain similar data.

1059:   Sample usage:
1060:   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062:   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063:   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064:   increasing order

1066: .vb
1067:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068:     my_type l = *(my_type *) left, r = *(my_type *) right;
1069:     return (l < r) ? -1 : (l > r);
1070:   }
1071: .ve
1072:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074:   Then pass the function
1075: .vb
1076:   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077: .ve


1080:   Fortran Notes:
1081:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082:   returns result. For example
1083: .vb
1084:  subroutine CompareIntegers(left,right,ctx,result)
1085:    implicit none

1087:    PetscInt,intent(in) :: left, right
1088:    type(UserCtx)       :: ctx
1089:    integer,intent(out) :: result

1091:    if (left < right) then
1092:      result = -1
1093:    else if (left == right) then
1094:      result = 0
1095:    else
1096:      result = 1
1097:    end if
1098:    return
1099:  end subroutine CompareIntegers
1100: .ve

1102:   References:
1103:   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt

1105:   Level: developer

1107: .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108: @*/
1109: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1110: {
1111:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1112:   PetscTimSortStack  runstack[128];
1113:   PetscTimSortBuffer abuff, bbuff;
1114:   PetscErrorCode     ierr;
1115:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116:    It is so unlikely that this limit is reached that this is __never__ checked for */

1119:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120:    is a power of 2 or one plus a power of 2 */
1121:   {
1122:     PetscInt t = n, r = 0;
1123:     /* r becomes 1 if the least significant bits contain at least one off bit */
1124:     while (t >= 64) {
1125:       r |= t & 1;
1126:       t >>= 1;
1127:     }
1128:     minrun = t + r;
1129:   }
1130:   if (PetscDefined(USE_DEBUG)) {
1131:     PetscInfo1(NULL, "minrun = %D\n", minrun);
1132:     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1133:   }
1134:   PetscMalloc1((size_t) minrun*asize, &abuff.ptr);
1135:   abuff.size = (size_t) minrun*asize;
1136:   abuff.maxsize = (size_t) n*asize;
1137:   PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);
1138:   bbuff.size = (size_t) minrun*bsize;
1139:   bbuff.maxsize = (size_t) n*bsize;
1140:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1141:   while (runstart < n) {
1142:     /* Check if additional entries are at least partially ordered and build natural run */
1143:     PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1144:     runstack[stacksize].start = runstart;
1145:     runstack[stacksize].size = runend-runstart+1;
1146:     PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1147:     ++stacksize;
1148:     runstart = runend+1;
1149:   }
1150:   /* Have been inside while, so discard last stacksize++ */
1151:   --stacksize;
1152:   PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1153:   PetscFree(abuff.ptr);
1154:   PetscFree(bbuff.ptr);
1155:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1156:   return(0);
1157: }

1159: /*@
1160:    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.

1162:    Not Collective

1164:    Input Parameters:
1165: +  n   - number of values
1166: -  arr - array of integers

1168:    Output Parameters:
1169: .  arr - sorted array of integers

1171:    Notes:
1172:    If the array is less than 64 entries long PetscSortInt() is automatically used.

1174:    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1175:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1176:    recomended that the user benchmark their code to see which routine is fastest.

1178:    Level: intermediate

1180: .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1181: @*/
1182: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1183: {
1186:   if (n <= 1) return(0);
1188:   if (n < 64) {
1189:     PetscSortInt(n, arr);
1190:   } else {
1191:     PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1192:   }
1193:   return(0);
1194: }

1196: /*@
1197:    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1198:    array to match the first.

1200:    Not Collective

1202:    Input Parameters:
1203: +  n   - number of values
1204: .  arr1 - array of integers to be sorted
1205: -  arr2 - array of integers to be reordered

1207:    Output Parameters:
1208: +  arr1 - sorted array of integers
1209: -  arr2 - reordered array of integers

1211:    Notes:
1212:    The arrays CANNOT overlap.

1214:    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1215:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1216:    recomended that the user benchmark their code to see which routine is fastest.

1218:    Level: intermediate

1220: .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1221: @*/
1222: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1223: {
1226:   if (n <= 1) return(0);
1229:   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1230:   PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1231:   return(0);
1232: }

1234: /*@
1235:    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.

1237:    Not Collective

1239:    Input Parameters:
1240: +  n   - number of values
1241: -  arr - array of PetscMPIInts

1243:    Output Parameters:
1244: .  arr - sorted array of integers

1246:    Notes:
1247:    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.

1249:    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1250:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1251:    recomended that the user benchmark their code to see which routine is fastest.

1253:    Level: intermediate

1255: .seealso: PetscTimSort(), PetscSortMPIInt()
1256: @*/
1257: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1258: {
1259:   PetscErrorCode  ierr;

1262:   if (n <= 1) return(0);
1264:   if (n < 64) {
1265:     PetscSortMPIInt(n, arr);
1266:   } else {
1267:     PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1268:   }
1269:   return(0);
1270: }

1272: /*@
1273:    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1274:    array to match the first.

1276:    Not Collective

1278:    Input Parameters:
1279: +  n   - number of values
1280: .  arr1 - array of integers to be sorted
1281: -  arr2 - array of integers to be reordered

1283:    Output Parameters:
1284: +  arr1 - sorted array of integers
1285: -  arr2 - reordered array of integers

1287:    Notes:
1288:    The arrays CANNOT overlap.

1290:    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1291:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1292:    recomended that the user benchmark their code to see which routine is fastest.

1294:    Level: intermediate

1296: .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1297: @*/
1298: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1299: {
1302:   if (n <= 1) return(0);
1305:   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1306:   PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1307:   return(0);
1308: }

1310: /*@
1311:    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.

1313:    Not Collective

1315:    Input Parameters:
1316: +  n   - number of values
1317: -  arr - array of PetscReals

1319:    Output Parameters:
1320: .  arr - sorted array of integers

1322:    Notes:
1323:    If the array is less than 64 entries long PetscSortReal() is automatically used.

1325:    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1326:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1327:    recomended that the user benchmark their code to see which routine is fastest.

1329:    Level: intermediate

1331: .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1332: @*/
1333: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1334: {
1335:   PetscErrorCode  ierr;

1338:   if (n <= 1) return(0);
1340:   if (n < 64) {
1341:     PetscSortReal(n, arr);
1342:   } else {
1343:     PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1344:   }
1345:   return(0);
1346: }

1348: /*@
1349:    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1350:    array of PetscInts to match the first.

1352:    Not Collective

1354:    Input Parameters:
1355: +  n   - number of values
1356: .  arr1 - array of PetscReals to be sorted
1357: -  arr2 - array of PetscReals to be reordered

1359:    Output Parameters:
1360: +  arr1 - sorted array of PetscReals
1361: -  arr2 - reordered array of PetscInts

1363:    Notes:
1364:    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1365:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1366:    recomended that the user benchmark their code to see which routine is fastest.

1368:    Level: intermediate

1370: .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1371: @*/
1372: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1373: {
1376:   if (n <= 1) return(0);
1379:   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1380:   PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1381:   return(0);
1382: }