Mesh Oriented datABase  (version 5.6.0)
An array-based unstructured mesh library
moab::IntegerReprosum Class Reference

#include <IntegerReprosum.hpp>

Classes

struct  Metadata
 

Public Member Functions

 IntegerReprosum (int comm=0)
 Serial constructor; the comm parameter is unused. More...
 
 ~IntegerReprosum ()=default
 
double sum (const std::vector< double > &vals) const
 
double sum_masked (const std::vector< double > &vals, const std::vector< int > &mask) const
 
void sum_masked_batch (const std::vector< std::vector< double > > &fields, const std::vector< int > &mask, std::vector< double > &gsums) const
 

Private Member Functions

Metadata compute_metadata (const std::vector< double > &vals, const std::vector< int > *mask) const
 Reduce per-field local exponent extrema and the local count of non-zero summands across the comm and derive arr_max_shift, max_levels, extra_levels. More...
 
void encode_local (const std::vector< double > &vals, const std::vector< int > *mask, const Metadata &md, std::vector< int64_t > &iv) const
 Encode local summands into the integer-vector representation. iv is sized max_levels + extra_levels, indexed so that iv[idx] corresponds to algorithmic level idx - (extra_levels - 1). More...
 
void reduce_global (std::vector< int64_t > &iv) const
 MPI_Allreduce(MPI_SUM, MPI_INT64_T) on the integer vector. More...
 
double decode_global (const std::vector< int64_t > &iv, const Metadata &md) const
 Reconstruct double from the global integer vector. Faithful port of the decode section of shr_reprosum_int (preprocess for non- overlap and same-sign, truncate at FP-representable boundary, sum the resulting r8 components smallest-to-largest). More...
 

Static Private Attributes

static constexpr int kI8Digits = 63
 mantissa bits in int64 (sign excluded) More...
 
static constexpr int kR8Digits = 53
 mantissa bits in double More...
 
static constexpr int kRadix = 2
 

Detailed Description

Reproducible global summation using the integer-vector algorithm (Worley). One instance per MPI communicator; the sum() / sum_masked() entry points can be called repeatedly on different value vectors.

Definition at line 48 of file IntegerReprosum.hpp.

Constructor & Destructor Documentation

◆ IntegerReprosum()

moab::IntegerReprosum::IntegerReprosum ( int  comm = 0)
explicit

Serial constructor; the comm parameter is unused.

Definition at line 35 of file IntegerReprosum.cpp.

35 {}

◆ ~IntegerReprosum()

moab::IntegerReprosum::~IntegerReprosum ( )
default

Member Function Documentation

◆ compute_metadata()

IntegerReprosum::Metadata moab::IntegerReprosum::compute_metadata ( const std::vector< double > &  vals,
const std::vector< int > *  mask 
) const
private

Reduce per-field local exponent extrema and the local count of non-zero summands across the comm and derive arr_max_shift, max_levels, extra_levels.

Definition at line 97 of file IntegerReprosum.cpp.

99 {
100  // Local extrema (over non-zero values only) and total count of summands
101  // (including zeros — matches Fortran shr_reprosum_int's `nsummands`,
102  // which is the caller-passed array size, not a non-zero count).
103  int local_max_exp = std::numeric_limits< int >::min();
104  int local_min_exp = std::numeric_limits< int >::max();
105  int local_count = 0;
106 
107  for( size_t i = 0; i < vals.size(); ++i )
108  {
109  if( mask && ( *mask )[i] < 0 ) continue;
110  ++local_count; // total owned summands, including zeros
111  const double v = vals[i];
112  if( v == 0.0 ) continue;
113  int e;
114  std::frexp( v, &e );
115  if( e > local_max_exp ) local_max_exp = e;
116  if( e < local_min_exp ) local_min_exp = e;
117  }
118 
119  int gmax_exp = local_max_exp;
120  int gmin_exp = local_min_exp;
121  int gcount = local_count;
122 
123 #ifdef MOAB_HAVE_MPI
124  // One Allreduce, three quantities. Use negation trick on local_min_exp
125  // so we can take a single MPI_MAX reduction; same trick the Fortran
126  // does for arr_lextremes.
127  int local_arr[3];
128  int global_arr[3];
129  local_arr[0] = local_count; // MPI_MAX over count -> max_nsummands
130  local_arr[1] = local_max_exp; // MPI_MAX over max -> gmax_exp
131  local_arr[2] = -local_min_exp; // MPI_MAX over (-min) -> -gmin_exp
132  MPI_Allreduce( local_arr, global_arr, 3, MPI_INT, MPI_MAX, m_comm );
133  gcount = global_arr[0];
134  gmax_exp = global_arr[1];
135  gmin_exp = -global_arr[2];
136 #endif
137 
138  // Mirror MCT's all-zero / no-non-zero-summand-anywhere fixup
139  // (shr_reprosum_mod.F90 lines 939-941):
140  // arr_gmin_exp = min(arr_gmax_exp, arr_gmin_exp)
141  // This collapses the sentinel pair (gmax = INT_MIN, gmin = INT_MAX)
142  // to (INT_MIN, INT_MIN) so subsequent arithmetic on (gmax - gmin)
143  // doesn't blow up.
144  if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
145 
146  Metadata md;
147  md.max_nsummands = gcount;
148  md.gmax_exp = gmax_exp;
149  md.gmin_exp = gmin_exp;
150 
151  // Edge case: all summands zero (or no owned summands anywhere).
152  // Use arbitrary safe defaults; sum() will skip work and return 0.
153  if( md.max_nsummands == 0 )
154  {
155  md.arr_max_shift = kI8Digits / 4;
156  md.max_levels = 2;
157  md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
158  md.gmax_exp = 0;
159  md.gmin_exp = 0;
160  return md;
161  }
162 
163  // Conservative bound: account for thread-then-task summation per
164  // shr_reprosum_calc lines 800-802 (we have one thread, but keep the
165  // identical formula for byte-equivalence with MCT's thread=1 case).
166  const int omp_nthreads_local = 1;
167  int max_n = ( md.max_nsummands / omp_nthreads_local ) + 1;
168 #ifdef MOAB_HAVE_MPI
169  int nproc = 1;
170  MPI_Comm_size( m_comm, &nproc );
171  if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
172 #endif
173 
174  // arr_max_shift = digits(int64) - (exponent(real(max_n)) + 1)
175  //
176  // Fortran's exponent(x) for x = f * 2^E with f in [0.5, 1) returns E.
177  // C++ frexp(x, &e) gives the same e. The +1 here accounts for the
178  // upper bound: max_n < 2^(e+1), so summing max_n integers each less
179  // than 2^arr_max_shift in absolute value gives a sum less than
180  // 2^(arr_max_shift + e + 1). For this to fit in int64 (without
181  // overflow during MPI_Allreduce(MPI_SUM)) we need
182  // arr_max_shift + e + 1 <= digits(int64)
183  // hence arr_max_shift = digits(int64) - (e + 1).
184  int e_of_max_n;
185  std::frexp( static_cast< double >( max_n ), &e_of_max_n );
186  md.arr_max_shift = kI8Digits - ( e_of_max_n + 1 );
187 
188  if( md.arr_max_shift < 2 )
189  {
190  // Too many summands. The Fortran aborts here. We do too — caller
191  // shouldn't call us with > ~2^60 summands per rank. Returning a
192  // bogus value is worse than aborting clearly.
193  std::abort();
194  }
195 
196  // max_levels = 2 + (digits(double) + (gmax_exp - gmin_exp)) / arr_max_shift
197  md.max_levels = 2 + ( kR8Digits + ( md.gmax_exp - md.gmin_exp ) ) / md.arr_max_shift;
198  if( md.max_levels < 2 ) md.max_levels = 2;
199 
200  // extra_levels = (digits(int64) - 1) / arr_max_shift
201  md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
202  if( md.extra_levels < 1 ) md.extra_levels = 1;
203 
204  return md;
205 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::IntegerReprosum::Metadata::gmin_exp, moab::IntegerReprosum::Metadata::max_levels, and moab::IntegerReprosum::Metadata::max_nsummands.

◆ decode_global()

double moab::IntegerReprosum::decode_global ( const std::vector< int64_t > &  iv,
const Metadata md 
) const
private

Reconstruct double from the global integer vector. Faithful port of the decode section of shr_reprosum_int (preprocess for non- overlap and same-sign, truncate at FP-representable boundary, sum the resulting r8 components smallest-to-largest).

Definition at line 331 of file IntegerReprosum.cpp.

333 {
334  if( md.max_nsummands == 0 ) return 0.0;
335  const int max_levels = md.max_levels;
336  const int extra_levels = md.extra_levels;
337  const int arr_max_shift = md.arr_max_shift;
338  const int gmax_exp = md.gmax_exp;
339  const int min_level = -( extra_levels - 1 );
340 
341  auto iv_index = [extra_levels]( int level ) -> int { return level + ( extra_levels - 1 ); };
342 
343  // Working copy.
344  std::vector< int64_t > iv = iv_in;
345  const int64_t shift_factor = i2pow( arr_max_shift );
346 
347  // ---- (a)(i) propagate carries to non-overlap (high to low) ------------
348  for( int level = max_levels; level >= min_level + 1; --level )
349  {
350  const int idx = iv_index( level );
351  if( std::llabs( iv[idx] ) >= shift_factor )
352  {
353  const int64_t carry = iv[idx] / shift_factor;
354  iv[iv_index( level - 1 )] += carry;
355  iv[idx] -= carry * shift_factor;
356  }
357  }
358 
359  // Find the first non-zero level (low to high).
360  int first_level = max_levels;
361  for( int level = min_level; level <= max_levels; ++level )
362  {
363  if( iv[iv_index( level )] != 0 )
364  {
365  first_level = level;
366  break;
367  }
368  }
369  if( first_level == max_levels && iv[iv_index( max_levels )] == 0 )
370  {
371  // Sum is exactly zero.
372  return 0.0;
373  }
374 
375  // Determine sign of the sum (sign of first non-zero level).
376  int64_t sign = ( iv[iv_index( first_level )] < 0 ) ? -1 : 1;
377 
378  // ---- (a)(ii) make all components have the same sign ------------------
379  if( first_level < max_levels )
380  {
381  for( int j = first_level; j <= max_levels - 1; ++j )
382  {
383  const int j_idx = iv_index( j );
384  const int j1_idx = iv_index( j + 1 );
385  const int64_t s_here = ( iv[j_idx] < 0 ) ? -1 : ( iv[j_idx] > 0 ? 1 : 0 );
386  const int64_t s_next = ( iv[j1_idx] < 0 ) ? -1 : ( iv[j1_idx] > 0 ? 1 : 0 );
387  // Treat 0 at the next level as "different sign so always
388  // borrow", matching the Fortran condition
389  // sign(jlevel) /= sign(jlevel+1) .or. iv(jlevel+1) == 0
390  if( s_here != s_next || iv[j1_idx] == 0 )
391  {
392  iv[j_idx] -= sign;
393  iv[j1_idx] += sign * shift_factor;
394  }
395  }
396  }
397 
398  // ---- (a)(iii) flip to positive temporarily ---------------------------
399  if( sign < 0 )
400  {
401  for( int level = first_level; level <= max_levels; ++level )
402  iv[iv_index( level )] = -iv[iv_index( level )];
403  }
404 
405  // ---- (a)(iv) re-impose non-overlap (carries) -------------------------
406  for( int level = max_levels; level >= min_level + 1; --level )
407  {
408  const int idx = iv_index( level );
409  if( std::llabs( iv[idx] ) >= shift_factor )
410  {
411  const int64_t carry = iv[idx] / shift_factor;
412  iv[iv_index( level - 1 )] += carry;
413  iv[idx] -= carry * shift_factor;
414  }
415  }
416 
417  // ---- (b)(c)(d) iterate: truncate at FP-representable digit, convert
418  // to FP, append to summand_vector ------------
419  std::vector< double > summand_vector;
420  summand_vector.reserve( static_cast< size_t >( max_levels + extra_levels ) );
421 
422  bool first_iteration = true;
423  int arr_shift_curr = gmax_exp - min_level * arr_max_shift;
424  int digit_count = 0;
425  int begin_level = min_level;
426 
427  while( begin_level <= max_levels )
428  {
429  // Determine the level at which the cumulative number of integer
430  // digits equals or exceeds digits(double) = 53. That's where
431  // truncation needs to happen.
432  int trunc_loc = 0;
433  int trunc_level = max_levels;
434 
435  for( int level = begin_level; level <= max_levels; ++level )
436  {
437  int LX;
438  if( first_iteration )
439  {
440  if( digit_count == 0 )
441  {
442  if( iv[iv_index( level )] != 0 )
443  {
444  const double Xf = static_cast< double >( iv[iv_index( level )] );
445  int e_of_X;
446  std::frexp( Xf, &e_of_X );
447  LX = e_of_X;
448  }
449  else
450  {
451  LX = 0;
452  }
453  }
454  else
455  {
456  LX = arr_max_shift;
457  }
458  }
459  else
460  {
461  if( level == begin_level && digit_count != 0 )
462  LX = 0;
463  else
464  LX = arr_max_shift;
465  }
466 
467  if( digit_count + LX >= kR8Digits )
468  {
469  trunc_level = level;
470  trunc_loc = ( digit_count + LX ) - kR8Digits;
471  break;
472  }
473  else
474  {
475  digit_count += LX;
476  }
477  }
478  first_iteration = false;
479 
480  // Compute the truncated value at trunc_level and the remainder
481  // (the bits that didn't fit in digits(double)).
482  int64_t trunc_level_rem = 0;
483  if( trunc_loc != 0 )
484  {
485  const int64_t pow_trunc = i2pow( trunc_loc );
486  const int64_t kept = iv[iv_index( trunc_level )] / pow_trunc;
487  const int64_t kept_full = kept * pow_trunc;
488  trunc_level_rem = iv[iv_index( trunc_level )] - kept_full;
489  iv[iv_index( trunc_level )] = kept_full;
490  }
491 
492  // Convert truncated integer-vector segment [begin_level..trunc_level]
493  // to FP and accumulate into a fresh summand_vector entry.
494  double seg_sum = 0.0;
495  for( int level = begin_level; level <= trunc_level; ++level )
496  {
497  const int64_t v = iv[iv_index( level )];
498  if( v != 0 )
499  {
500  const double Xf = static_cast< double >( v );
501  int e_of_X;
502  std::frexp( Xf, &e_of_X );
503  const int curr_exp = e_of_X + arr_shift_curr;
504  const int min_exp = std::numeric_limits< double >::min_exponent;
505  if( curr_exp >= min_exp )
506  {
507  seg_sum += set_exp( Xf, curr_exp );
508  }
509  else
510  {
511  // Subnormal-region scaling: split into two ldexp's so
512  // intermediate stays representable. Mirrors the Fortran
513  // set_exponent + scale combo.
514  const double rxv = set_exp( Xf, curr_exp - min_exp );
515  seg_sum += scale2( rxv, min_exp );
516  }
517  }
518 
519  // Step the arr_shift down by arr_max_shift unless we're
520  // staying at the same trunc_level for the next iteration
521  // (which happens when trunc_loc > 0).
522  if( level < trunc_level || trunc_loc == 0 )
523  {
524  arr_shift_curr -= arr_max_shift;
525  }
526  }
527 
528  summand_vector.push_back( seg_sum );
529 
530  if( trunc_loc == 0 )
531  {
532  digit_count = 0;
533  begin_level = trunc_level + 1;
534  }
535  else
536  {
537  digit_count = trunc_loc;
538  begin_level = trunc_level;
539  // The remainder at trunc_level becomes the new starting value
540  // for the next iteration.
541  iv[iv_index( trunc_level )] = trunc_level_rem;
542  }
543  }
544 
545  // ---- (e) sum smallest to largest -------------------------------------
546  double result = 0.0;
547  for( auto it = summand_vector.rbegin(); it != summand_vector.rend(); ++it )
548  result += *it;
549 
550  // ---- (f) restore sign ------------------------------------------------
551  if( sign < 0 ) result = -result;
552 
553  return result;
554 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::anonymous_namespace{IntegerReprosum.cpp}::i2pow(), moab::IntegerReprosum::Metadata::max_levels, moab::IntegerReprosum::Metadata::max_nsummands, moab::anonymous_namespace{IntegerReprosum.cpp}::scale2(), and moab::anonymous_namespace{IntegerReprosum.cpp}::set_exp().

◆ encode_local()

void moab::IntegerReprosum::encode_local ( const std::vector< double > &  vals,
const std::vector< int > *  mask,
const Metadata md,
std::vector< int64_t > &  iv 
) const
private

Encode local summands into the integer-vector representation. iv is sized max_levels + extra_levels, indexed so that iv[idx] corresponds to algorithmic level idx - (extra_levels - 1).

Definition at line 221 of file IntegerReprosum.cpp.

225 {
226  const int max_levels = md.max_levels;
227  const int extra_levels = md.extra_levels;
228  const int arr_max_shift = md.arr_max_shift;
229  const int gmax_exp = md.gmax_exp;
230 
231  iv.assign( max_levels + extra_levels, 0 );
232  if( md.max_nsummands == 0 ) return;
233 
234  auto iv_index = [extra_levels]( int level ) -> int { return level + ( extra_levels - 1 ); };
235 
236  for( size_t i = 0; i < vals.size(); ++i )
237  {
238  if( mask && ( *mask )[i] < 0 ) continue;
239  const double x = vals[i];
240  if( x == 0.0 ) continue;
241 
242  int arr_exp;
243  const double arr_frac = std::frexp( x, &arr_exp );
244 
245  // gmax_exp is supposed to be a global upper bound; if a summand
246  // exceeds it, the metadata reduction was wrong. Caller bug —
247  // skip silently rather than overflow the integer vector.
248  if( arr_exp > gmax_exp ) continue;
249 
250  int arr_shift = arr_max_shift - ( gmax_exp - arr_exp );
251  int ilevel;
252 
253  if( arr_shift < 1 )
254  {
255  ilevel = ( 1 + ( gmax_exp - arr_exp ) ) / arr_max_shift;
256  arr_shift = ilevel * arr_max_shift - ( gmax_exp - arr_exp );
257  while( arr_shift < 1 )
258  {
259  arr_shift += arr_max_shift;
260  ilevel += 1;
261  }
262  }
263  else
264  {
265  ilevel = 1;
266  }
267 
268  if( ilevel > max_levels ) continue; // smaller than smallest representable
269 
270  // First shift / truncate / accumulate.
271  double remainder = scale2( arr_frac, arr_shift );
272  int64_t i_part = static_cast< int64_t >( remainder ); // truncates toward 0
273  iv[iv_index( ilevel )] += i_part;
274  remainder -= static_cast< double >( i_part );
275 
276  // Continue while remainder is non-zero and we still have levels.
277  while( remainder != 0.0 && ilevel < max_levels )
278  {
279  ++ilevel;
280  remainder = scale2( remainder, arr_max_shift );
281  i_part = static_cast< int64_t >( remainder );
282  iv[iv_index( ilevel )] += i_part;
283  remainder -= static_cast< double >( i_part );
284  }
285  }
286 
287  // Postprocess: walk levels high-to-low (most-significant to least),
288  // moving overflow into the lower-significance level. Same as the
289  // Fortran "(a)" comment block at lines 1410–1432 of
290  // shr_reprosum_mod.F90. Required so the integer vector cannot
291  // overflow during the subsequent MPI_Allreduce(SUM).
292  const int64_t shift_factor = i2pow( arr_max_shift );
293  const int min_level = -( extra_levels - 1 );
294  for( int level = max_levels; level >= min_level + 1; --level )
295  {
296  const int idx = iv_index( level );
297  if( std::llabs( iv[idx] ) >= shift_factor )
298  {
299  const int64_t carry = iv[idx] / shift_factor;
300  iv[iv_index( level - 1 )] += carry;
301  iv[idx] -= carry * shift_factor;
302  }
303  }
304 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::anonymous_namespace{IntegerReprosum.cpp}::i2pow(), moab::IntegerReprosum::Metadata::max_levels, moab::IntegerReprosum::Metadata::max_nsummands, and moab::anonymous_namespace{IntegerReprosum.cpp}::scale2().

◆ reduce_global()

void moab::IntegerReprosum::reduce_global ( std::vector< int64_t > &  iv) const
private

MPI_Allreduce(MPI_SUM, MPI_INT64_T) on the integer vector.

Definition at line 310 of file IntegerReprosum.cpp.

311 {
312 #ifdef MOAB_HAVE_MPI
313  if( iv.empty() ) return;
314  std::vector< int64_t > out( iv.size() );
315  MPI_Allreduce( iv.data(), out.data(), static_cast< int >( iv.size() ), mpi_int64(), MPI_SUM,
316  m_comm );
317  iv.swap( out );
318 #else
319  (void)iv;
320 #endif
321 }

◆ sum()

double moab::IntegerReprosum::sum ( const std::vector< double > &  vals) const

Compute the global sum of every entry in vals across all ranks on the constructor's MPI communicator. Bit-identical regardless of how vals is partitioned across ranks or iterated locally.

Definition at line 560 of file IntegerReprosum.cpp.

561 {
562  return sum_masked( vals, std::vector< int >() );
563 }

◆ sum_masked()

double moab::IntegerReprosum::sum_masked ( const std::vector< double > &  vals,
const std::vector< int > &  mask 
) const

Compute the global sum of vals[i] only where mask[i] >= 0. Useful for excluding halo / not-owned entries when summing per-rank partial vectors. mask must be the same length as vals.

Definition at line 565 of file IntegerReprosum.cpp.

567 {
568  const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
569 
570  Metadata md = compute_metadata( vals, mask_ptr );
571  if( md.max_nsummands == 0 ) return 0.0;
572 
573  std::vector< int64_t > iv;
574  encode_local( vals, mask_ptr, md, iv );
575  reduce_global( iv );
576  return decode_global( iv, md );
577 }

References moab::IntegerReprosum::Metadata::max_nsummands.

◆ sum_masked_batch()

void moab::IntegerReprosum::sum_masked_batch ( const std::vector< std::vector< double > > &  fields,
const std::vector< int > &  mask,
std::vector< double > &  gsums 
) const

Convenience batch entry: compute one global sum per input vector, sharing the metadata (gmax/gmin exponent) reduction across all fields in the batch. Equivalent to calling sum_masked() N times but with one fewer MPI_Allreduce per field for the metadata. All input vectors must have the same length and use the same mask.

Definition at line 579 of file IntegerReprosum.cpp.

582 {
583  // -----------------------------------------------------------------------
584  // True batched reproducible sum: one MPI_Allreduce for ALL fields'
585  // metadata (gmax_exp / gmin_exp / max_nsummands), one MPI_Allreduce for
586  // ALL fields' encoded integer vectors concatenated end-to-end.
587  //
588  // Bit-for-bit identical to calling sum_masked() once per field: each
589  // field still gets its OWN per-field Metadata derived from its OWN
590  // global extrema, its OWN encode_local pass, and its OWN decode_global
591  // pass on its OWN segment of the concatenated int-vector. Only the
592  // network transport is fused. This drops 5 field-reductions from
593  // 5 * 2 = 10 collective calls to 2 collective calls — the
594  // per-CAAS-call MPI cost that matters at scale.
595  // -----------------------------------------------------------------------
596  const size_t N = fields.size();
597  gsums.assign( N, 0.0 );
598  if( N == 0 ) return;
599 
600  const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
601 
602  // ----- Phase 1: local extrema and count, per field --------------------
603  // local_arr layout per field f (3 ints): [count, max_exp, -min_exp]
604  // (negation trick so a single MPI_MAX recovers all three.)
605  std::vector< int > local_arr( 3 * N, 0 );
606  for( size_t f = 0; f < N; ++f )
607  {
608  const std::vector< double >& vals = fields[f];
609  int local_max_exp = std::numeric_limits< int >::min();
610  int local_min_exp = std::numeric_limits< int >::max();
611  int local_count = 0;
612  for( size_t i = 0; i < vals.size(); ++i )
613  {
614  if( mask_ptr && ( *mask_ptr )[i] < 0 ) continue;
615  ++local_count;
616  const double v = vals[i];
617  if( v == 0.0 ) continue;
618  int e;
619  std::frexp( v, &e );
620  if( e > local_max_exp ) local_max_exp = e;
621  if( e < local_min_exp ) local_min_exp = e;
622  }
623  local_arr[3 * f + 0] = local_count;
624  local_arr[3 * f + 1] = local_max_exp;
625  local_arr[3 * f + 2] = -local_min_exp;
626  }
627 
628  // ----- Phase 2: ONE MPI_Allreduce for all fields' metadata ------------
629  std::vector< int > global_arr( 3 * N, 0 );
630 #ifdef MOAB_HAVE_MPI
631  MPI_Allreduce( local_arr.data(), global_arr.data(), static_cast< int >( 3 * N ),
632  MPI_INT, MPI_MAX, m_comm );
633 #else
634  global_arr = local_arr;
635 #endif
636 
637  // ----- Phase 3: derive per-field Metadata locally (no MPI) -----------
638  std::vector< Metadata > mds( N );
639  for( size_t f = 0; f < N; ++f )
640  {
641  Metadata& md = mds[f];
642  int gcount = global_arr[3 * f + 0];
643  int gmax_exp = global_arr[3 * f + 1];
644  int gmin_exp = -global_arr[3 * f + 2];
645 
646  // Same all-zero fixup as compute_metadata
647  if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
648 
649  md.max_nsummands = gcount;
650  md.gmax_exp = gmax_exp;
651  md.gmin_exp = gmin_exp;
652 
653  if( md.max_nsummands == 0 )
654  {
655  md.arr_max_shift = kI8Digits / 4;
656  md.max_levels = 2;
657  md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
658  md.gmax_exp = 0;
659  md.gmin_exp = 0;
660  continue;
661  }
662 
663  // Mirror compute_metadata's derivation byte-for-byte.
664  const int omp_nthreads_local = 1;
665  int max_n = ( md.max_nsummands / omp_nthreads_local ) + 1;
666 #ifdef MOAB_HAVE_MPI
667  int nproc = 1;
668  MPI_Comm_size( m_comm, &nproc );
669  if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
670 #endif
671  int e_of_max_n;
672  std::frexp( static_cast< double >( max_n ), &e_of_max_n );
673  md.arr_max_shift = kI8Digits - ( e_of_max_n + 1 );
674  if( md.arr_max_shift < 2 ) std::abort();
675  md.max_levels = 2 + ( kR8Digits + ( md.gmax_exp - md.gmin_exp ) ) / md.arr_max_shift;
676  if( md.max_levels < 2 ) md.max_levels = 2;
677  md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
678  if( md.extra_levels < 1 ) md.extra_levels = 1;
679  }
680 
681  // ----- Phase 4: encode each field locally and concatenate -------------
682  std::vector< size_t > offsets( N + 1, 0 );
683  for( size_t f = 0; f < N; ++f )
684  offsets[f + 1] = offsets[f] + static_cast< size_t >( mds[f].max_levels + mds[f].extra_levels );
685  std::vector< int64_t > big_iv( offsets[N], 0 );
686 
687  for( size_t f = 0; f < N; ++f )
688  {
689  if( mds[f].max_nsummands == 0 ) continue;
690  std::vector< int64_t > iv;
691  encode_local( fields[f], mask_ptr, mds[f], iv );
692  std::copy( iv.begin(), iv.end(), big_iv.begin() + offsets[f] );
693  }
694 
695  // ----- Phase 5: ONE MPI_Allreduce for all encoded vectors ------------
696 #ifdef MOAB_HAVE_MPI
697  if( !big_iv.empty() )
698  {
699  std::vector< int64_t > out( big_iv.size() );
700  MPI_Allreduce( big_iv.data(), out.data(), static_cast< int >( big_iv.size() ),
701  mpi_int64(), MPI_SUM, m_comm );
702  big_iv.swap( out );
703  }
704 #endif
705 
706  // ----- Phase 6: decode each field's segment ---------------------------
707  for( size_t f = 0; f < N; ++f )
708  {
709  if( mds[f].max_nsummands == 0 )
710  {
711  gsums[f] = 0.0;
712  continue;
713  }
714  std::vector< int64_t > seg( big_iv.begin() + offsets[f], big_iv.begin() + offsets[f + 1] );
715  gsums[f] = decode_global( seg, mds[f] );
716  }
717 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::IntegerReprosum::Metadata::gmin_exp, moab::IntegerReprosum::Metadata::max_levels, and moab::IntegerReprosum::Metadata::max_nsummands.

Referenced by moab::TempestOnlineMap::ApplyWeightsWithDualMap().

Member Data Documentation

◆ kI8Digits

constexpr int moab::IntegerReprosum::kI8Digits = 63
staticconstexprprivate

mantissa bits in int64 (sign excluded)

Definition at line 130 of file IntegerReprosum.hpp.

◆ kR8Digits

constexpr int moab::IntegerReprosum::kR8Digits = 53
staticconstexprprivate

mantissa bits in double

Definition at line 131 of file IntegerReprosum.hpp.

◆ kRadix

constexpr int moab::IntegerReprosum::kRadix = 2
staticconstexprprivate

Definition at line 132 of file IntegerReprosum.hpp.


The documentation for this class was generated from the following files: