#include <IntegerReprosum.hpp>

Classes
struct	Metadata

Public Member Functions
	IntegerReprosum (int comm=0)
	Serial constructor; the comm parameter is unused. More...

	~IntegerReprosum ()=default

double	sum (const std::vector< double > &vals) const

double	sum_masked (const std::vector< double > &vals, const std::vector< int > &mask) const

void	sum_masked_batch (const std::vector< std::vector< double > > &fields, const std::vector< int > &mask, std::vector< double > &gsums) const

Private Member Functions
Metadata	compute_metadata (const std::vector< double > &vals, const std::vector< int > *mask) const
	Reduce per-field local exponent extrema and the local count of non-zero summands across the comm and derive arr_max_shift, max_levels, extra_levels. More...

void	encode_local (const std::vector< double > &vals, const std::vector< int > *mask, const Metadata &md, std::vector< int64_t > &iv) const
	Encode local summands into the integer-vector representation. `iv` is sized `max_levels + extra_levels`, indexed so that iv[idx] corresponds to algorithmic level `idx - (extra_levels - 1)`. More...

void	reduce_global (std::vector< int64_t > &iv) const
	MPI_Allreduce(MPI_SUM, MPI_INT64_T) on the integer vector. More...

double	decode_global (const std::vector< int64_t > &iv, const Metadata &md) const
	Reconstruct double from the global integer vector. Faithful port of the decode section of shr_reprosum_int (preprocess for non- overlap and same-sign, truncate at FP-representable boundary, sum the resulting r8 components smallest-to-largest). More...

Static Private Attributes
static constexpr int	kI8Digits = 63
	mantissa bits in int64 (sign excluded) More...

static constexpr int	kR8Digits = 53
	mantissa bits in double More...

static constexpr int	kRadix = 2

Detailed Description

Reproducible global summation using the integer-vector algorithm (Worley). One instance per MPI communicator; the sum() / sum_masked() entry points can be called repeatedly on different value vectors.

Definition at line 48 of file IntegerReprosum.hpp.

Constructor & Destructor Documentation

◆ IntegerReprosum()

moab::IntegerReprosum::IntegerReprosum ( int comm = 0 )

explicit

Serial constructor; the comm parameter is unused.

Definition at line 35 of file IntegerReprosum.cpp.

35 {}

◆ ~IntegerReprosum()

moab::IntegerReprosum::~IntegerReprosum ( )

default

Member Function Documentation

◆ compute_metadata()

IntegerReprosum::Metadata moab::IntegerReprosum::compute_metadata	(	const std::vector< double > &	vals,
		const std::vector< int > *	mask
	)		const

private

Reduce per-field local exponent extrema and the local count of non-zero summands across the comm and derive arr_max_shift, max_levels, extra_levels.

Definition at line 97 of file IntegerReprosum.cpp.

 {
     // Local extrema (over non-zero values only) and total count of summands
     // (including zeros — matches Fortran shr_reprosum_int's `nsummands`,
     // which is the caller-passed array size, not a non-zero count).
     int local_max_exp = std::numeric_limits< int >::min();
     int local_min_exp = std::numeric_limits< int >::max();
     int local_count   = 0;
  
     for( size_t i = 0; i < vals.size(); ++i )
     {
         if( mask && ( *mask )[i] < 0 ) continue;
         ++local_count;  // total owned summands, including zeros
         const double v = vals[i];
         if( v == 0.0 ) continue;
         int e;
         std::frexp( v, &e );
         if( e > local_max_exp ) local_max_exp = e;
         if( e < local_min_exp ) local_min_exp = e;
     }
  
     int gmax_exp = local_max_exp;
     int gmin_exp = local_min_exp;
     int gcount   = local_count;
  
 #ifdef MOAB_HAVE_MPI
     // One Allreduce, three quantities. Use negation trick on local_min_exp
     // so we can take a single MPI_MAX reduction; same trick the Fortran
     // does for arr_lextremes.
     int local_arr[3];
     int global_arr[3];
     local_arr[0] = local_count;       // MPI_MAX over count -> max_nsummands
     local_arr[1] = local_max_exp;     // MPI_MAX over max -> gmax_exp
     local_arr[2] = -local_min_exp;    // MPI_MAX over (-min) -> -gmin_exp
     MPI_Allreduce( local_arr, global_arr, 3, MPI_INT, MPI_MAX, m_comm );
     gcount   = global_arr[0];
     gmax_exp = global_arr[1];
     gmin_exp = -global_arr[2];
 #endif
  
     // Mirror MCT's all-zero / no-non-zero-summand-anywhere fixup
     // (shr_reprosum_mod.F90 lines 939-941):
     //   arr_gmin_exp = min(arr_gmax_exp, arr_gmin_exp)
     // This collapses the sentinel pair (gmax = INT_MIN, gmin = INT_MAX)
     // to (INT_MIN, INT_MIN) so subsequent arithmetic on (gmax - gmin)
     // doesn't blow up.
     if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
  
     Metadata md;
     md.max_nsummands = gcount;
     md.gmax_exp      = gmax_exp;
     md.gmin_exp      = gmin_exp;
  
     // Edge case: all summands zero (or no owned summands anywhere).
     // Use arbitrary safe defaults; sum() will skip work and return 0.
     if( md.max_nsummands == 0 )
     {
         md.arr_max_shift = kI8Digits / 4;
         md.max_levels    = 2;
         md.extra_levels  = ( kI8Digits - 1 ) / md.arr_max_shift;
         md.gmax_exp      = 0;
         md.gmin_exp      = 0;
         return md;
     }
  
     // Conservative bound: account for thread-then-task summation per
     // shr_reprosum_calc lines 800-802 (we have one thread, but keep the
     // identical formula for byte-equivalence with MCT's thread=1 case).
     const int omp_nthreads_local = 1;
     int max_n = ( md.max_nsummands / omp_nthreads_local ) + 1;
 #ifdef MOAB_HAVE_MPI
     int nproc = 1;
     MPI_Comm_size( m_comm, &nproc );
     if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
 #endif
  
     // arr_max_shift = digits(int64) - (exponent(real(max_n)) + 1)
     //
     // Fortran's exponent(x) for x = f * 2^E with f in [0.5, 1) returns E.
     // C++ frexp(x, &e) gives the same e. The +1 here accounts for the
     // upper bound: max_n < 2^(e+1), so summing max_n integers each less
     // than 2^arr_max_shift in absolute value gives a sum less than
     // 2^(arr_max_shift + e + 1). For this to fit in int64 (without
     // overflow during MPI_Allreduce(MPI_SUM)) we need
     //   arr_max_shift + e + 1 <= digits(int64)
     // hence arr_max_shift = digits(int64) - (e + 1).
     int e_of_max_n;
     std::frexp( static_cast< double >( max_n ), &e_of_max_n );
     md.arr_max_shift = kI8Digits - ( e_of_max_n + 1 );
  
     if( md.arr_max_shift < 2 )
     {
         // Too many summands. The Fortran aborts here. We do too — caller
         // shouldn't call us with > ~2^60 summands per rank. Returning a
         // bogus value is worse than aborting clearly.
         std::abort();
     }
  
     // max_levels = 2 + (digits(double) + (gmax_exp - gmin_exp)) / arr_max_shift
     md.max_levels = 2 + ( kR8Digits + ( md.gmax_exp - md.gmin_exp ) ) / md.arr_max_shift;
     if( md.max_levels < 2 ) md.max_levels = 2;
  
     // extra_levels = (digits(int64) - 1) / arr_max_shift
     md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
     if( md.extra_levels < 1 ) md.extra_levels = 1;
  
     return md;
 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::IntegerReprosum::Metadata::gmin_exp, moab::IntegerReprosum::Metadata::max_levels, and moab::IntegerReprosum::Metadata::max_nsummands.

◆ decode_global()

double moab::IntegerReprosum::decode_global	(	const std::vector< int64_t > &	iv,
		const Metadata &	md
	)		const

private

Reconstruct double from the global integer vector. Faithful port of the decode section of shr_reprosum_int (preprocess for non- overlap and same-sign, truncate at FP-representable boundary, sum the resulting r8 components smallest-to-largest).

Definition at line 331 of file IntegerReprosum.cpp.

 {
     if( md.max_nsummands == 0 ) return 0.0;
     const int max_levels    = md.max_levels;
     const int extra_levels  = md.extra_levels;
     const int arr_max_shift = md.arr_max_shift;
     const int gmax_exp      = md.gmax_exp;
     const int min_level     = -( extra_levels - 1 );
  
     auto iv_index = [extra_levels]( int level ) -> int { return level + ( extra_levels - 1 ); };
  
     // Working copy.
     std::vector< int64_t > iv = iv_in;
     const int64_t shift_factor = i2pow( arr_max_shift );
  
     // ---- (a)(i) propagate carries to non-overlap (high to low) ------------
     for( int level = max_levels; level >= min_level + 1; --level )
     {
         const int idx = iv_index( level );
         if( std::llabs( iv[idx] ) >= shift_factor )
         {
             const int64_t carry = iv[idx] / shift_factor;
             iv[iv_index( level - 1 )] += carry;
             iv[idx] -= carry * shift_factor;
         }
     }
  
     // Find the first non-zero level (low to high).
     int first_level = max_levels;
     for( int level = min_level; level <= max_levels; ++level )
     {
         if( iv[iv_index( level )] != 0 )
         {
             first_level = level;
             break;
         }
     }
     if( first_level == max_levels && iv[iv_index( max_levels )] == 0 )
     {
         // Sum is exactly zero.
         return 0.0;
     }
  
     // Determine sign of the sum (sign of first non-zero level).
     int64_t sign = ( iv[iv_index( first_level )] < 0 ) ? -1 : 1;
  
     // ---- (a)(ii) make all components have the same sign ------------------
     if( first_level < max_levels )
     {
         for( int j = first_level; j <= max_levels - 1; ++j )
         {
             const int j_idx  = iv_index( j );
             const int j1_idx = iv_index( j + 1 );
             const int64_t s_here = ( iv[j_idx] < 0 ) ? -1 : ( iv[j_idx] > 0 ? 1 : 0 );
             const int64_t s_next = ( iv[j1_idx] < 0 ) ? -1 : ( iv[j1_idx] > 0 ? 1 : 0 );
             // Treat 0 at the next level as "different sign so always
             // borrow", matching the Fortran condition
             //   sign(jlevel) /= sign(jlevel+1) .or. iv(jlevel+1) == 0
             if( s_here != s_next || iv[j1_idx] == 0 )
             {
                 iv[j_idx]  -= sign;
                 iv[j1_idx] += sign * shift_factor;
             }
         }
     }
  
     // ---- (a)(iii) flip to positive temporarily ---------------------------
     if( sign < 0 )
     {
         for( int level = first_level; level <= max_levels; ++level )
             iv[iv_index( level )] = -iv[iv_index( level )];
     }
  
     // ---- (a)(iv) re-impose non-overlap (carries) -------------------------
     for( int level = max_levels; level >= min_level + 1; --level )
     {
         const int idx = iv_index( level );
         if( std::llabs( iv[idx] ) >= shift_factor )
         {
             const int64_t carry = iv[idx] / shift_factor;
             iv[iv_index( level - 1 )] += carry;
             iv[idx] -= carry * shift_factor;
         }
     }
  
     // ---- (b)(c)(d) iterate: truncate at FP-representable digit, convert
     //                          to FP, append to summand_vector ------------
     std::vector< double > summand_vector;
     summand_vector.reserve( static_cast< size_t >( max_levels + extra_levels ) );
  
     bool first_iteration = true;
     int arr_shift_curr   = gmax_exp - min_level * arr_max_shift;
     int digit_count      = 0;
     int begin_level      = min_level;
  
     while( begin_level <= max_levels )
     {
         // Determine the level at which the cumulative number of integer
         // digits equals or exceeds digits(double) = 53. That's where
         // truncation needs to happen.
         int trunc_loc   = 0;
         int trunc_level = max_levels;
  
         for( int level = begin_level; level <= max_levels; ++level )
         {
             int LX;
             if( first_iteration )
             {
                 if( digit_count == 0 )
                 {
                     if( iv[iv_index( level )] != 0 )
                     {
                         const double Xf = static_cast< double >( iv[iv_index( level )] );
                         int e_of_X;
                         std::frexp( Xf, &e_of_X );
                         LX = e_of_X;
                     }
                     else
                     {
                         LX = 0;
                     }
                 }
                 else
                 {
                     LX = arr_max_shift;
                 }
             }
             else
             {
                 if( level == begin_level && digit_count != 0 )
                     LX = 0;
                 else
                     LX = arr_max_shift;
             }
  
             if( digit_count + LX >= kR8Digits )
             {
                 trunc_level = level;
                 trunc_loc   = ( digit_count + LX ) - kR8Digits;
                 break;
             }
             else
             {
                 digit_count += LX;
             }
         }
         first_iteration = false;
  
         // Compute the truncated value at trunc_level and the remainder
         // (the bits that didn't fit in digits(double)).
         int64_t trunc_level_rem = 0;
         if( trunc_loc != 0 )
         {
             const int64_t pow_trunc = i2pow( trunc_loc );
             const int64_t kept      = iv[iv_index( trunc_level )] / pow_trunc;
             const int64_t kept_full = kept * pow_trunc;
             trunc_level_rem         = iv[iv_index( trunc_level )] - kept_full;
             iv[iv_index( trunc_level )] = kept_full;
         }
  
         // Convert truncated integer-vector segment [begin_level..trunc_level]
         // to FP and accumulate into a fresh summand_vector entry.
         double seg_sum = 0.0;
         for( int level = begin_level; level <= trunc_level; ++level )
         {
             const int64_t v = iv[iv_index( level )];
             if( v != 0 )
             {
                 const double Xf = static_cast< double >( v );
                 int e_of_X;
                 std::frexp( Xf, &e_of_X );
                 const int curr_exp = e_of_X + arr_shift_curr;
                 const int min_exp  = std::numeric_limits< double >::min_exponent;
                 if( curr_exp >= min_exp )
                 {
                     seg_sum += set_exp( Xf, curr_exp );
                 }
                 else
                 {
                     // Subnormal-region scaling: split into two ldexp's so
                     // intermediate stays representable. Mirrors the Fortran
                     // set_exponent + scale combo.
                     const double rxv = set_exp( Xf, curr_exp - min_exp );
                     seg_sum += scale2( rxv, min_exp );
                 }
             }
  
             // Step the arr_shift down by arr_max_shift unless we're
             // staying at the same trunc_level for the next iteration
             // (which happens when trunc_loc > 0).
             if( level < trunc_level || trunc_loc == 0 )
             {
                 arr_shift_curr -= arr_max_shift;
             }
         }
  
         summand_vector.push_back( seg_sum );
  
         if( trunc_loc == 0 )
         {
             digit_count = 0;
             begin_level = trunc_level + 1;
         }
         else
         {
             digit_count = trunc_loc;
             begin_level = trunc_level;
             // The remainder at trunc_level becomes the new starting value
             // for the next iteration.
             iv[iv_index( trunc_level )] = trunc_level_rem;
         }
     }
  
     // ---- (e) sum smallest to largest -------------------------------------
     double result = 0.0;
     for( auto it = summand_vector.rbegin(); it != summand_vector.rend(); ++it )
         result += *it;
  
     // ---- (f) restore sign ------------------------------------------------
     if( sign < 0 ) result = -result;
  
     return result;
 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::anonymous_namespace{IntegerReprosum.cpp}::i2pow(), moab::IntegerReprosum::Metadata::max_levels, moab::IntegerReprosum::Metadata::max_nsummands, moab::anonymous_namespace{IntegerReprosum.cpp}::scale2(), and moab::anonymous_namespace{IntegerReprosum.cpp}::set_exp().

◆ encode_local()

void moab::IntegerReprosum::encode_local	(	const std::vector< double > &	vals,
		const std::vector< int > *	mask,
		const Metadata &	md,
		std::vector< int64_t > &	iv
	)		const

private

Encode local summands into the integer-vector representation. iv is sized max_levels + extra_levels, indexed so that iv[idx] corresponds to algorithmic level idx - (extra_levels - 1).

Definition at line 221 of file IntegerReprosum.cpp.

 {
     const int max_levels    = md.max_levels;
     const int extra_levels  = md.extra_levels;
     const int arr_max_shift = md.arr_max_shift;
     const int gmax_exp      = md.gmax_exp;
  
     iv.assign( max_levels + extra_levels, 0 );
     if( md.max_nsummands == 0 ) return;
  
     auto iv_index = [extra_levels]( int level ) -> int { return level + ( extra_levels - 1 ); };
  
     for( size_t i = 0; i < vals.size(); ++i )
     {
         if( mask && ( *mask )[i] < 0 ) continue;
         const double x = vals[i];
         if( x == 0.0 ) continue;
  
         int arr_exp;
         const double arr_frac = std::frexp( x, &arr_exp );
  
         // gmax_exp is supposed to be a global upper bound; if a summand
         // exceeds it, the metadata reduction was wrong. Caller bug —
         // skip silently rather than overflow the integer vector.
         if( arr_exp > gmax_exp ) continue;
  
         int arr_shift = arr_max_shift - ( gmax_exp - arr_exp );
         int ilevel;
  
         if( arr_shift < 1 )
         {
             ilevel = ( 1 + ( gmax_exp - arr_exp ) ) / arr_max_shift;
             arr_shift = ilevel * arr_max_shift - ( gmax_exp - arr_exp );
             while( arr_shift < 1 )
             {
                 arr_shift += arr_max_shift;
                 ilevel    += 1;
             }
         }
         else
         {
             ilevel = 1;
         }
  
         if( ilevel > max_levels ) continue;  // smaller than smallest representable
  
         // First shift / truncate / accumulate.
         double remainder = scale2( arr_frac, arr_shift );
         int64_t i_part   = static_cast< int64_t >( remainder );  // truncates toward 0
         iv[iv_index( ilevel )] += i_part;
         remainder -= static_cast< double >( i_part );
  
         // Continue while remainder is non-zero and we still have levels.
         while( remainder != 0.0 && ilevel < max_levels )
         {
             ++ilevel;
             remainder = scale2( remainder, arr_max_shift );
             i_part    = static_cast< int64_t >( remainder );
             iv[iv_index( ilevel )] += i_part;
             remainder -= static_cast< double >( i_part );
         }
     }
  
     // Postprocess: walk levels high-to-low (most-significant to least),
     // moving overflow into the lower-significance level. Same as the
     // Fortran "(a)" comment block at lines 1410–1432 of
     // shr_reprosum_mod.F90. Required so the integer vector cannot
     // overflow during the subsequent MPI_Allreduce(SUM).
     const int64_t shift_factor = i2pow( arr_max_shift );
     const int min_level        = -( extra_levels - 1 );
     for( int level = max_levels; level >= min_level + 1; --level )
     {
         const int idx = iv_index( level );
         if( std::llabs( iv[idx] ) >= shift_factor )
         {
             const int64_t carry = iv[idx] / shift_factor;
             iv[iv_index( level - 1 )] += carry;
             iv[idx] -= carry * shift_factor;
         }
     }
 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::anonymous_namespace{IntegerReprosum.cpp}::i2pow(), moab::IntegerReprosum::Metadata::max_levels, moab::IntegerReprosum::Metadata::max_nsummands, and moab::anonymous_namespace{IntegerReprosum.cpp}::scale2().

◆ reduce_global()

void moab::IntegerReprosum::reduce_global ( std::vector< int64_t > & iv ) const

private

MPI_Allreduce(MPI_SUM, MPI_INT64_T) on the integer vector.

Definition at line 310 of file IntegerReprosum.cpp.

 {
 #ifdef MOAB_HAVE_MPI
     if( iv.empty() ) return;
     std::vector< int64_t > out( iv.size() );
     MPI_Allreduce( iv.data(), out.data(), static_cast< int >( iv.size() ), mpi_int64(), MPI_SUM,
                    m_comm );
     iv.swap( out );
 #else
     (void)iv;
 #endif
 }

◆ sum()

double moab::IntegerReprosum::sum ( const std::vector< double > & vals ) const

Compute the global sum of every entry in vals across all ranks on the constructor's MPI communicator. Bit-identical regardless of how vals is partitioned across ranks or iterated locally.

Definition at line 560 of file IntegerReprosum.cpp.

 {
     return sum_masked( vals, std::vector< int >() );
 }

◆ sum_masked()

double moab::IntegerReprosum::sum_masked	(	const std::vector< double > &	vals,
		const std::vector< int > &	mask
	)		const

Compute the global sum of vals[i] only where mask[i] >= 0. Useful for excluding halo / not-owned entries when summing per-rank partial vectors. mask must be the same length as vals.

Definition at line 565 of file IntegerReprosum.cpp.

 {
     const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
  
     Metadata md = compute_metadata( vals, mask_ptr );
     if( md.max_nsummands == 0 ) return 0.0;
  
     std::vector< int64_t > iv;
     encode_local( vals, mask_ptr, md, iv );
     reduce_global( iv );
     return decode_global( iv, md );
 }

References moab::IntegerReprosum::Metadata::max_nsummands.

◆ sum_masked_batch()

void moab::IntegerReprosum::sum_masked_batch	(	const std::vector< std::vector< double > > &	fields,
		const std::vector< int > &	mask,
		std::vector< double > &	gsums
	)		const

Convenience batch entry: compute one global sum per input vector, sharing the metadata (gmax/gmin exponent) reduction across all fields in the batch. Equivalent to calling sum_masked() N times but with one fewer MPI_Allreduce per field for the metadata. All input vectors must have the same length and use the same mask.

Definition at line 579 of file IntegerReprosum.cpp.

 {
     // -----------------------------------------------------------------------
     // True batched reproducible sum: one MPI_Allreduce for ALL fields'
     // metadata (gmax_exp / gmin_exp / max_nsummands), one MPI_Allreduce for
     // ALL fields' encoded integer vectors concatenated end-to-end.
     //
     // Bit-for-bit identical to calling sum_masked() once per field: each
     // field still gets its OWN per-field Metadata derived from its OWN
     // global extrema, its OWN encode_local pass, and its OWN decode_global
     // pass on its OWN segment of the concatenated int-vector. Only the
     // network transport is fused. This drops 5 field-reductions from
     //   5 * 2 = 10  collective calls  to  2  collective calls — the
     // per-CAAS-call MPI cost that matters at scale.
     // -----------------------------------------------------------------------
     const size_t N = fields.size();
     gsums.assign( N, 0.0 );
     if( N == 0 ) return;
  
     const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
  
     // ----- Phase 1: local extrema and count, per field --------------------
     // local_arr layout per field f (3 ints): [count, max_exp, -min_exp]
     // (negation trick so a single MPI_MAX recovers all three.)
     std::vector< int > local_arr( 3 * N, 0 );
     for( size_t f = 0; f < N; ++f )
     {
         const std::vector< double >& vals = fields[f];
         int local_max_exp = std::numeric_limits< int >::min();
         int local_min_exp = std::numeric_limits< int >::max();
         int local_count   = 0;
         for( size_t i = 0; i < vals.size(); ++i )
         {
             if( mask_ptr && ( *mask_ptr )[i] < 0 ) continue;
             ++local_count;
             const double v = vals[i];
             if( v == 0.0 ) continue;
             int e;
             std::frexp( v, &e );
             if( e > local_max_exp ) local_max_exp = e;
             if( e < local_min_exp ) local_min_exp = e;
         }
         local_arr[3 * f + 0] = local_count;
         local_arr[3 * f + 1] = local_max_exp;
         local_arr[3 * f + 2] = -local_min_exp;
     }
  
     // ----- Phase 2: ONE MPI_Allreduce for all fields' metadata ------------
     std::vector< int > global_arr( 3 * N, 0 );
 #ifdef MOAB_HAVE_MPI
     MPI_Allreduce( local_arr.data(), global_arr.data(), static_cast< int >( 3 * N ),
                    MPI_INT, MPI_MAX, m_comm );
 #else
     global_arr = local_arr;
 #endif
  
     // ----- Phase 3: derive per-field Metadata locally (no MPI) -----------
     std::vector< Metadata > mds( N );
     for( size_t f = 0; f < N; ++f )
     {
         Metadata& md = mds[f];
         int gcount   = global_arr[3 * f + 0];
         int gmax_exp = global_arr[3 * f + 1];
         int gmin_exp = -global_arr[3 * f + 2];
  
         // Same all-zero fixup as compute_metadata
         if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
  
         md.max_nsummands = gcount;
         md.gmax_exp      = gmax_exp;
         md.gmin_exp      = gmin_exp;
  
         if( md.max_nsummands == 0 )
         {
             md.arr_max_shift = kI8Digits / 4;
             md.max_levels    = 2;
             md.extra_levels  = ( kI8Digits - 1 ) / md.arr_max_shift;
             md.gmax_exp      = 0;
             md.gmin_exp      = 0;
             continue;
         }
  
         // Mirror compute_metadata's derivation byte-for-byte.
         const int omp_nthreads_local = 1;
         int max_n = ( md.max_nsummands / omp_nthreads_local ) + 1;
 #ifdef MOAB_HAVE_MPI
         int nproc = 1;
         MPI_Comm_size( m_comm, &nproc );
         if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
 #endif
         int e_of_max_n;
         std::frexp( static_cast< double >( max_n ), &e_of_max_n );
         md.arr_max_shift = kI8Digits - ( e_of_max_n + 1 );
         if( md.arr_max_shift < 2 ) std::abort();
         md.max_levels = 2 + ( kR8Digits + ( md.gmax_exp - md.gmin_exp ) ) / md.arr_max_shift;
         if( md.max_levels < 2 ) md.max_levels = 2;
         md.extra_levels = ( kI8Digits - 1 ) / md.arr_max_shift;
         if( md.extra_levels < 1 ) md.extra_levels = 1;
     }
  
     // ----- Phase 4: encode each field locally and concatenate -------------
     std::vector< size_t > offsets( N + 1, 0 );
     for( size_t f = 0; f < N; ++f )
         offsets[f + 1] = offsets[f] + static_cast< size_t >( mds[f].max_levels + mds[f].extra_levels );
     std::vector< int64_t > big_iv( offsets[N], 0 );
  
     for( size_t f = 0; f < N; ++f )
     {
         if( mds[f].max_nsummands == 0 ) continue;
         std::vector< int64_t > iv;
         encode_local( fields[f], mask_ptr, mds[f], iv );
         std::copy( iv.begin(), iv.end(), big_iv.begin() + offsets[f] );
     }
  
     // ----- Phase 5: ONE MPI_Allreduce for all encoded vectors ------------
 #ifdef MOAB_HAVE_MPI
     if( !big_iv.empty() )
     {
         std::vector< int64_t > out( big_iv.size() );
         MPI_Allreduce( big_iv.data(), out.data(), static_cast< int >( big_iv.size() ),
                        mpi_int64(), MPI_SUM, m_comm );
         big_iv.swap( out );
     }
 #endif
  
     // ----- Phase 6: decode each field's segment ---------------------------
     for( size_t f = 0; f < N; ++f )
     {
         if( mds[f].max_nsummands == 0 )
         {
             gsums[f] = 0.0;
             continue;
         }
         std::vector< int64_t > seg( big_iv.begin() + offsets[f], big_iv.begin() + offsets[f + 1] );
         gsums[f] = decode_global( seg, mds[f] );
     }
 }

References moab::IntegerReprosum::Metadata::arr_max_shift, moab::IntegerReprosum::Metadata::extra_levels, moab::IntegerReprosum::Metadata::gmax_exp, moab::IntegerReprosum::Metadata::gmin_exp, moab::IntegerReprosum::Metadata::max_levels, and moab::IntegerReprosum::Metadata::max_nsummands.

Referenced by moab::TempestOnlineMap::ApplyWeightsWithDualMap().

Member Data Documentation

◆ kI8Digits

constexpr int moab::IntegerReprosum::kI8Digits = 63

staticconstexprprivate

mantissa bits in int64 (sign excluded)

Definition at line 130 of file IntegerReprosum.hpp.

◆ kR8Digits

constexpr int moab::IntegerReprosum::kR8Digits = 53

staticconstexprprivate

mantissa bits in double

Definition at line 131 of file IntegerReprosum.hpp.

◆ kRadix

constexpr int moab::IntegerReprosum::kRadix = 2

staticconstexprprivate

Definition at line 132 of file IntegerReprosum.hpp.

The documentation for this class was generated from the following files:

Classes

Public Member Functions

Private Member Functions

Static Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ IntegerReprosum()

◆ ~IntegerReprosum()

Member Function Documentation

◆ compute_metadata()

◆ decode_global()

◆ encode_local()

◆ reduce_global()

◆ sum()

◆ sum_masked()

◆ sum_masked_batch()

Member Data Documentation

◆ kI8Digits

◆ kR8Digits

◆ kRadix