35 IntegerReprosum::IntegerReprosum(
int ) {}
51 return std::frexp( x, &exp_out );
56 inline double scale2(
double x,
int n )
58 return std::ldexp( x, n );
67 double f = std::frexp( x, &dummy );
68 return std::ldexp( f, e );
75 inline MPI_Datatype mpi_int64()
80 return MPI_LONG_LONG_INT;
88 return (
static_cast< int64_t
>( 1 ) << k );
98 const std::vector< int >* mask )
const
103 int local_max_exp = std::numeric_limits< int >::min();
104 int local_min_exp = std::numeric_limits< int >::max();
107 for(
size_t i = 0; i < vals.size(); ++i )
109 if( mask && ( *mask )[i] < 0 )
continue;
111 const double v = vals[i];
112 if( v == 0.0 )
continue;
115 if( e > local_max_exp ) local_max_exp = e;
116 if( e < local_min_exp ) local_min_exp = e;
119 int gmax_exp = local_max_exp;
120 int gmin_exp = local_min_exp;
121 int gcount = local_count;
129 local_arr[0] = local_count;
130 local_arr[1] = local_max_exp;
131 local_arr[2] = -local_min_exp;
132 MPI_Allreduce( local_arr, global_arr, 3, MPI_INT, MPI_MAX, m_comm );
133 gcount = global_arr[0];
134 gmax_exp = global_arr[1];
135 gmin_exp = -global_arr[2];
144 if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
166 const int omp_nthreads_local = 1;
170 MPI_Comm_size( m_comm, &nproc );
171 if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
185 std::frexp(
static_cast< double >( max_n ), &e_of_max_n );
221 void IntegerReprosum::encode_local(
const std::vector< double >& vals,
222 const std::vector< int >* mask,
224 std::vector< int64_t >& iv )
const
231 iv.assign( max_levels + extra_levels, 0 );
234 auto iv_index = [extra_levels](
int level ) ->
int {
return level + ( extra_levels - 1 ); };
236 for(
size_t i = 0; i < vals.size(); ++i )
238 if( mask && ( *mask )[i] < 0 )
continue;
239 const double x = vals[i];
240 if( x == 0.0 )
continue;
243 const double arr_frac = std::frexp( x, &arr_exp );
248 if( arr_exp > gmax_exp )
continue;
250 int arr_shift = arr_max_shift - ( gmax_exp - arr_exp );
255 ilevel = ( 1 + ( gmax_exp - arr_exp ) ) / arr_max_shift;
256 arr_shift = ilevel * arr_max_shift - ( gmax_exp - arr_exp );
257 while( arr_shift < 1 )
259 arr_shift += arr_max_shift;
268 if( ilevel > max_levels )
continue;
271 double remainder =
scale2( arr_frac, arr_shift );
272 int64_t i_part =
static_cast< int64_t
>( remainder );
273 iv[iv_index( ilevel )] += i_part;
274 remainder -=
static_cast< double >( i_part );
277 while( remainder != 0.0 && ilevel < max_levels )
280 remainder =
scale2( remainder, arr_max_shift );
281 i_part =
static_cast< int64_t
>( remainder );
282 iv[iv_index( ilevel )] += i_part;
283 remainder -=
static_cast< double >( i_part );
292 const int64_t shift_factor =
i2pow( arr_max_shift );
293 const int min_level = -( extra_levels - 1 );
294 for(
int level = max_levels; level >= min_level + 1; --level )
296 const int idx = iv_index( level );
297 if( std::llabs( iv[idx] ) >= shift_factor )
299 const int64_t carry = iv[idx] / shift_factor;
300 iv[iv_index( level - 1 )] += carry;
301 iv[idx] -= carry * shift_factor;
310 void IntegerReprosum::reduce_global( std::vector< int64_t >& iv )
const
313 if( iv.empty() )
return;
314 std::vector< int64_t > out( iv.size() );
315 MPI_Allreduce( iv.data(), out.data(),
static_cast< int >( iv.size() ), mpi_int64(), MPI_SUM,
331 double IntegerReprosum::decode_global(
const std::vector< int64_t >& iv_in,
339 const int min_level = -( extra_levels - 1 );
341 auto iv_index = [extra_levels](
int level ) ->
int {
return level + ( extra_levels - 1 ); };
344 std::vector< int64_t > iv = iv_in;
345 const int64_t shift_factor =
i2pow( arr_max_shift );
348 for(
int level = max_levels; level >= min_level + 1; --level )
350 const int idx = iv_index( level );
351 if( std::llabs( iv[idx] ) >= shift_factor )
353 const int64_t carry = iv[idx] / shift_factor;
354 iv[iv_index( level - 1 )] += carry;
355 iv[idx] -= carry * shift_factor;
360 int first_level = max_levels;
361 for(
int level = min_level; level <= max_levels; ++level )
363 if( iv[iv_index( level )] != 0 )
369 if( first_level == max_levels && iv[iv_index( max_levels )] == 0 )
376 int64_t sign = ( iv[iv_index( first_level )] < 0 ) ? -1 : 1;
379 if( first_level < max_levels )
381 for(
int j = first_level; j <= max_levels - 1; ++j )
383 const int j_idx = iv_index( j );
384 const int j1_idx = iv_index( j + 1 );
385 const int64_t s_here = ( iv[j_idx] < 0 ) ? -1 : ( iv[j_idx] > 0 ? 1 : 0 );
386 const int64_t s_next = ( iv[j1_idx] < 0 ) ? -1 : ( iv[j1_idx] > 0 ? 1 : 0 );
390 if( s_here != s_next || iv[j1_idx] == 0 )
393 iv[j1_idx] += sign * shift_factor;
401 for(
int level = first_level; level <= max_levels; ++level )
402 iv[iv_index( level )] = -iv[iv_index( level )];
406 for(
int level = max_levels; level >= min_level + 1; --level )
408 const int idx = iv_index( level );
409 if( std::llabs( iv[idx] ) >= shift_factor )
411 const int64_t carry = iv[idx] / shift_factor;
412 iv[iv_index( level - 1 )] += carry;
413 iv[idx] -= carry * shift_factor;
419 std::vector< double > summand_vector;
420 summand_vector.reserve(
static_cast< size_t >( max_levels + extra_levels ) );
422 bool first_iteration =
true;
423 int arr_shift_curr = gmax_exp - min_level * arr_max_shift;
425 int begin_level = min_level;
427 while( begin_level <= max_levels )
433 int trunc_level = max_levels;
435 for(
int level = begin_level; level <= max_levels; ++level )
438 if( first_iteration )
440 if( digit_count == 0 )
442 if( iv[iv_index( level )] != 0 )
444 const double Xf =
static_cast< double >( iv[iv_index( level )] );
446 std::frexp( Xf, &e_of_X );
461 if( level == begin_level && digit_count != 0 )
467 if( digit_count + LX >= kR8Digits )
470 trunc_loc = ( digit_count + LX ) - kR8Digits;
478 first_iteration =
false;
482 int64_t trunc_level_rem = 0;
485 const int64_t pow_trunc =
i2pow( trunc_loc );
486 const int64_t kept = iv[iv_index( trunc_level )] / pow_trunc;
487 const int64_t kept_full = kept * pow_trunc;
488 trunc_level_rem = iv[iv_index( trunc_level )] - kept_full;
489 iv[iv_index( trunc_level )] = kept_full;
494 double seg_sum = 0.0;
495 for(
int level = begin_level; level <= trunc_level; ++level )
497 const int64_t v = iv[iv_index( level )];
500 const double Xf =
static_cast< double >( v );
502 std::frexp( Xf, &e_of_X );
503 const int curr_exp = e_of_X + arr_shift_curr;
504 const int min_exp = std::numeric_limits< double >::min_exponent;
505 if( curr_exp >= min_exp )
507 seg_sum +=
set_exp( Xf, curr_exp );
514 const double rxv =
set_exp( Xf, curr_exp - min_exp );
515 seg_sum +=
scale2( rxv, min_exp );
522 if( level < trunc_level || trunc_loc == 0 )
524 arr_shift_curr -= arr_max_shift;
528 summand_vector.push_back( seg_sum );
533 begin_level = trunc_level + 1;
537 digit_count = trunc_loc;
538 begin_level = trunc_level;
541 iv[iv_index( trunc_level )] = trunc_level_rem;
547 for(
auto it = summand_vector.rbegin(); it != summand_vector.rend(); ++it )
551 if( sign < 0 ) result = -result;
562 return sum_masked( vals, std::vector< int >() );
565 double IntegerReprosum::sum_masked(
const std::vector< double >& vals,
566 const std::vector< int >& mask )
const
568 const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
570 Metadata md = compute_metadata( vals, mask_ptr );
573 std::vector< int64_t > iv;
574 encode_local( vals, mask_ptr, md, iv );
576 return decode_global( iv, md );
579 void IntegerReprosum::sum_masked_batch(
const std::vector< std::vector< double > >& fields,
580 const std::vector< int >& mask,
581 std::vector< double >& gsums )
const
596 const size_t N = fields.size();
597 gsums.assign( N, 0.0 );
600 const std::vector< int >* mask_ptr = mask.empty() ? nullptr : &mask;
605 std::vector< int > local_arr( 3 * N, 0 );
606 for(
size_t f = 0; f < N; ++f )
608 const std::vector< double >& vals = fields[f];
609 int local_max_exp = std::numeric_limits< int >::min();
610 int local_min_exp = std::numeric_limits< int >::max();
612 for(
size_t i = 0; i < vals.size(); ++i )
614 if( mask_ptr && ( *mask_ptr )[i] < 0 )
continue;
616 const double v = vals[i];
617 if( v == 0.0 )
continue;
620 if( e > local_max_exp ) local_max_exp = e;
621 if( e < local_min_exp ) local_min_exp = e;
623 local_arr[3 * f + 0] = local_count;
624 local_arr[3 * f + 1] = local_max_exp;
625 local_arr[3 * f + 2] = -local_min_exp;
629 std::vector< int > global_arr( 3 * N, 0 );
631 MPI_Allreduce( local_arr.data(), global_arr.data(),
static_cast< int >( 3 * N ),
632 MPI_INT, MPI_MAX, m_comm );
634 global_arr = local_arr;
638 std::vector< Metadata > mds( N );
639 for(
size_t f = 0; f < N; ++f )
642 int gcount = global_arr[3 * f + 0];
643 int gmax_exp = global_arr[3 * f + 1];
644 int gmin_exp = -global_arr[3 * f + 2];
647 if( gmin_exp > gmax_exp ) gmin_exp = gmax_exp;
664 const int omp_nthreads_local = 1;
668 MPI_Comm_size( m_comm, &nproc );
669 if( max_n < nproc * omp_nthreads_local ) max_n = nproc * omp_nthreads_local;
672 std::frexp(
static_cast< double >( max_n ), &e_of_max_n );
682 std::vector< size_t > offsets( N + 1, 0 );
683 for(
size_t f = 0; f < N; ++f )
684 offsets[f + 1] = offsets[f] +
static_cast< size_t >( mds[f].max_levels + mds[f].extra_levels );
685 std::vector< int64_t > big_iv( offsets[N], 0 );
687 for(
size_t f = 0; f < N; ++f )
689 if( mds[f].max_nsummands == 0 )
continue;
690 std::vector< int64_t > iv;
691 encode_local( fields[f], mask_ptr, mds[f], iv );
692 std::copy( iv.begin(), iv.end(), big_iv.begin() + offsets[f] );
697 if( !big_iv.empty() )
699 std::vector< int64_t > out( big_iv.size() );
700 MPI_Allreduce( big_iv.data(), out.data(),
static_cast< int >( big_iv.size() ),
701 mpi_int64(), MPI_SUM, m_comm );
707 for(
size_t f = 0; f < N; ++f )
709 if( mds[f].max_nsummands == 0 )
714 std::vector< int64_t > seg( big_iv.begin() + offsets[f], big_iv.begin() + offsets[f + 1] );
715 gsums[f] = decode_global( seg, mds[f] );