Mesh Oriented datABase  (version 5.6.0)
An array-based unstructured mesh library
IntegerReprosum.hpp
Go to the documentation of this file.
1 /*
2  * IntegerReprosum.hpp
3  *
4  * Bit-reproducible global sum via Worley's integer-vector algorithm.
5  *
6  * This is a C++ port of E3SM's `shr_reprosum_int` (Fortran) found in
7  * share/util/shr_reprosum_mod.F90. Each FP summand is represented as an
8  * integer vector keyed by exponent levels; the integer vector is then
9  * reduced via MPI_Allreduce on int64_t (genuinely commutative and
10  * associative), and the result is converted back to floating point with
11  * a deterministic reconstruction. The final sum is bit-identical
12  * regardless of:
13  * - MPI rank count
14  * - local iteration order on each rank
15  * - mesh decomposition
16  *
17  * For BFB matching with MCT-coupler runs, set `reprosum_use_ddpdd=.false.`
18  * in `user_nl_cpl` so MCT also uses the integer-vector path (its default).
19  *
20  * References:
21  * P. Worley, "Reproducibility and Performance of MPI-Reduce in MPAS-O,"
22  * Oak Ridge National Laboratory.
23  * E3SM share/util/shr_reprosum_mod.F90 :: shr_reprosum_int
24  *
25  * Author: ported for MOAB CAAS dual-map BFB work.
26  */
27 
28 #ifndef MOAB_INTEGER_REPROSUM_HPP
29 #define MOAB_INTEGER_REPROSUM_HPP
30 
31 #include "moab/MOABConfig.h"
32 
33 #include <vector>
34 #include <cstdint>
35 
36 #ifdef MOAB_HAVE_MPI
37 #include "moab_mpi.h"
38 #endif
39 
40 namespace moab
41 {
42 
43 /**
44  * Reproducible global summation using the integer-vector algorithm
45  * (Worley). One instance per MPI communicator; the sum() / sum_masked()
46  * entry points can be called repeatedly on different value vectors.
47  */
49 {
50  public:
51 #ifdef MOAB_HAVE_MPI
52  explicit IntegerReprosum( MPI_Comm comm );
53 #else
54  /// Serial constructor; the comm parameter is unused.
55  explicit IntegerReprosum( int comm = 0 );
56 #endif
57 
58  ~IntegerReprosum() = default;
59 
60  /**
61  * Compute the global sum of every entry in `vals` across all ranks
62  * on the constructor's MPI communicator. Bit-identical regardless of
63  * how `vals` is partitioned across ranks or iterated locally.
64  */
65  double sum( const std::vector< double >& vals ) const;
66 
67  /**
68  * Compute the global sum of `vals[i]` only where `mask[i] >= 0`.
69  * Useful for excluding halo / not-owned entries when summing per-rank
70  * partial vectors. `mask` must be the same length as `vals`.
71  */
72  double sum_masked( const std::vector< double >& vals,
73  const std::vector< int >& mask ) const;
74 
75  /**
76  * Convenience batch entry: compute one global sum per input vector,
77  * sharing the metadata (gmax/gmin exponent) reduction across all
78  * fields in the batch. Equivalent to calling sum_masked() N times
79  * but with one fewer MPI_Allreduce per field for the metadata.
80  * All input vectors must have the same length and use the same mask.
81  */
82  void sum_masked_batch( const std::vector< std::vector< double > >& fields,
83  const std::vector< int >& mask,
84  std::vector< double >& gsums ) const;
85 
86  private:
87 #ifdef MOAB_HAVE_MPI
88  MPI_Comm m_comm;
89 #endif
90 
91  // === implementation helpers ===
92 
93  struct Metadata
94  {
95  int gmax_exp; ///< global max exponent of any non-zero summand
96  int gmin_exp; ///< global min exponent of any non-zero summand
97  int max_nsummands; ///< MPI_MAX of local count of non-zero summands
98  int arr_max_shift; ///< per-level integer shift; chosen so the level
99  ///< sum cannot overflow int64
100  int max_levels; ///< number of integer-vector levels for this field
101  int extra_levels; ///< padding levels at the high end to absorb
102  ///< overflow during cross-rank summation
103  };
104 
105  /// Reduce per-field local exponent extrema and the local count of
106  /// non-zero summands across the comm and derive arr_max_shift,
107  /// max_levels, extra_levels.
108  Metadata compute_metadata( const std::vector< double >& vals,
109  const std::vector< int >* mask ) const;
110 
111  /// Encode local summands into the integer-vector representation.
112  /// `iv` is sized `max_levels + extra_levels`, indexed so that
113  /// iv[idx] corresponds to algorithmic level `idx - (extra_levels - 1)`.
114  void encode_local( const std::vector< double >& vals,
115  const std::vector< int >* mask,
116  const Metadata& md,
117  std::vector< int64_t >& iv ) const;
118 
119  /// MPI_Allreduce(MPI_SUM, MPI_INT64_T) on the integer vector.
120  void reduce_global( std::vector< int64_t >& iv ) const;
121 
122  /// Reconstruct double from the global integer vector. Faithful port
123  /// of the decode section of shr_reprosum_int (preprocess for non-
124  /// overlap and same-sign, truncate at FP-representable boundary,
125  /// sum the resulting r8 components smallest-to-largest).
126  double decode_global( const std::vector< int64_t >& iv,
127  const Metadata& md ) const;
128 
129  // === constants ===
130  static constexpr int kI8Digits = 63; ///< mantissa bits in int64 (sign excluded)
131  static constexpr int kR8Digits = 53; ///< mantissa bits in double
132  static constexpr int kRadix = 2;
133 };
134 
135 } // namespace moab
136 
137 #endif // MOAB_INTEGER_REPROSUM_HPP