[treeplayer] Report integer Scan values that cannot be printed exactly

guitargeek · guitargeek · commit 27cbd7b22e88 · 2026-06-15T23:33:57.000+02:00
Even evaluated through long double, an integer column can still exceed the accumulator's exactly-representable range: above 2^53 where long double is merely a 64-bit double (e.g. macOS ARM), or beyond 2^64 for results that overflow. As explained in the preceding commit, this cannot be fixed without changing TTreeFormula's frozen floating-point arithmetic, so it is a genuine known limitation, not a bug to work around -- but it must not pass silently. TTreeFormula::PrintValue now emits an error whenever the value it is about to print as an integer has reached the point where the long double accumulator can no longer hold every integer exactly, so the printed digits may be rounded. The diagnostic is deliberately a kError, issued for every offending value with no deduplication: silently printing a wrong integer is precisely the trap we want to surface as loudly as possible, even on a many-row Scan. The threshold check is inclusive (>= 2^digits) on purpose: a rounded result can land back exactly on the threshold (e.g. 2^53 + 1 -> 2^53 in a 53-bit type), and an inclusive comparison avoids missing those. Closes #7844. 🤖 Done with the help of [Claude Code](https://claude.com/claude-code) (Claude Opus 4.8)
diff --git a/tree/treeplayer/src/TTreeFormula.cxx b/tree/treeplayer/src/TTreeFormula.cxx
@@ -49,6 +49,7 @@
 #include <cstdio>
 #include <cmath>
 #include <cstdlib>
+#include <limits>
 #include <typeinfo>
 #include <algorithm>
 #include <sstream>
@@ -4033,6 +4034,35 @@ template <> Long64_t fmod_local(Long64_t x, Long64_t y) { return fmod((LongDoubl
 template<typename T> inline void SetMethodParam(TMethodCall *method, T p) { method->SetParam(p); }
 template<> void SetMethodParam(TMethodCall *method, LongDouble_t p) { method->SetParam((Double_t)p); }
 
+// TTree::Scan prints each integer column through TTreeFormula, which evaluates it
+// in a `long double` accumulator (for both the "l" and "ll" formats). That holds
+// integers exactly only up to 2^digits (its mantissa size: 64 bits on x86, but
+// just 53 where `long double` is merely a 64-bit `double`, e.g. macOS ARM), so a
+// value beyond that range is printed rounded. This cannot be fixed without
+// changing the (frozen) floating-point arithmetic of TTreeFormula, so it is a
+// known limitation; flag it loudly instead. See
+// https://github.com/root-project/root/issues/7844.
+inline void CheckIntegerPrintPrecision(LongDouble_t evaluated, const char *expression)
+{
+   // Integers are represented exactly only while their magnitude stays below
+   // 2^digits. At or above that threshold not every integer is representable, so
+   // the printed value may be rounded -- and a rounded result can even land back
+   // on the threshold (e.g. 2^53 + 1 -> 2^53 in a 53-bit type), so the comparison
+   // is inclusive to avoid missing such cases.
+   const LongDouble_t threshold = std::ldexp(1.0L, std::numeric_limits<LongDouble_t>::digits); // 2^digits
+   // Deliberately a kError (not kWarning) emitted for every offending value with
+   // no deduplication: silently printing a wrong integer is the trap we want to
+   // make impossible to miss, so the diagnostic is intentionally as loud as
+   // possible even on a many-row Scan.
+   if (evaluated >= threshold || evaluated <= -threshold)
+      ::Error("TTreeFormula::PrintValue",
+              "the integer value of \"%s\" may be inexact: its magnitude reaches 2^%d, "
+              "the point beyond which the long double used to evaluate it can no longer "
+              "represent every integer exactly, so the printed value may be rounded. "
+              "This is a known limitation "
+              "(https://github.com/root-project/root/issues/7844).",
+              expression, std::numeric_limits<LongDouble_t>::digits);
+}
 }
 
 template<typename T> inline T TTreeFormula::GetConstant(Int_t k) { return fConst[k]; }
@@ -5072,8 +5102,22 @@ char *TTreeFormula::PrintValue(Int_t mode, Int_t instance, const char *decform)
                {
                   switch (outputSizeLevel) {
                      case 0:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Short_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
-                     case 2:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Long_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
-                     case 3:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Long64_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
+                     // Evaluate both the "long" ("l") and "long long" ("ll") formats
+                     // through `long double` so that, where it is wide enough (x86),
+                     // the full 64-bit value prints exactly; this keeps floating-point
+                     // arithmetic semantics unchanged.
+                     case 2: {
+                        LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
+                        CheckIntegerPrintPrecision(v, GetTitle());
+                        snprintf(value, kMAXLENGTH, Form("%%%s", decform), (Long_t)v);
+                        break;
+                     }
+                     case 3: {
+                        LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
+                        CheckIntegerPrintPrecision(v, GetTitle());
+                        snprintf(value, kMAXLENGTH, Form("%%%s", decform), (Long64_t)v);
+                        break;
+                     }
                      case 1:
                      default: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Int_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
                   }
@@ -5086,8 +5130,21 @@ char *TTreeFormula::PrintValue(Int_t mode, Int_t instance, const char *decform)
                {
                   switch (outputSizeLevel) {
                      case 0:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(UShort_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
-                     case 2:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(ULong_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
-                     case 3:  snprintf(value,kMAXLENGTH,Form("%%%s",decform),(ULong64_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
+                     // See the signed 'd'/'i' case above: both "l" and "ll" evaluate
+                     // through `long double` to print the full value exactly where it
+                     // is wide enough, without changing arithmetic semantics.
+                     case 2: {
+                        LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
+                        CheckIntegerPrintPrecision(v, GetTitle());
+                        snprintf(value, kMAXLENGTH, Form("%%%s", decform), (ULong_t)v);
+                        break;
+                     }
+                     case 3: {
+                        LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
+                        CheckIntegerPrintPrecision(v, GetTitle());
+                        snprintf(value, kMAXLENGTH, Form("%%%s", decform), (ULong64_t)v);
+                        break;
+                     }
                      case 1:
                      default: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(UInt_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
                   }
diff --git a/tree/treeplayer/test/regressions.cxx b/tree/treeplayer/test/regressions.cxx
@@ -596,15 +596,14 @@ TEST(TTreeScan, TTreeGetBranchOfFriendTChain)
 // evaluated and printed as a double, rounding anything above 2^53.
 TEST(TTreeScan, ULong64Precision)
 {
-   // The "long long" Scan/Draw column format is evaluated through `long double`
-   // (see TTreeFormula::PrintValue), so exact 64-bit integer output is only
-   // possible where `long double` has more mantissa bits than `double`. That is
-   // the case on x86-64 (80-bit, 64-bit mantissa) but not, e.g., on macOS ARM
-   // where `long double` is just a 64-bit `double` (53-bit mantissa). Skip the
-   // exactness check there, since the value genuinely cannot be represented.
-   if (std::numeric_limits<long double>::digits <= std::numeric_limits<double>::digits)
-      GTEST_SKIP() << "long double is not wider than double here; the 64-bit value "
-                      "is genuinely unrepresentable and exactness cannot be checked";
+   // The "long" ("ld") and "long long" ("lld") Scan column formats are both
+   // evaluated through `long double` (see TTreeFormula::PrintValue), so exact
+   // 64-bit integer output is only possible where `long double` has more mantissa
+   // bits than `double`. That is the case on x86-64 (80-bit, 64-bit mantissa) but
+   // not, e.g., on macOS ARM where `long double` is just a 64-bit `double` (53-bit
+   // mantissa). This test checks the exact output on the former and, on the
+   // latter, that the value's unrepresentability is reported as the
+   // known-limitation error rather than silently rounded.
 
    // 1617047019150033926 needs 61 bits, so it cannot be represented exactly
    // by a double (53-bit mantissa).
@@ -658,10 +657,22 @@ TEST(TTreeScan, ULong64Precision)
    // off-by-one in the length-modifier detection, but both must behave identically.
    // The "ld" ("long") and "lld" ("long long") formats must behave identically too,
    // as both evaluate through `long double`.
-   // long double holds the 61-bit value exactly: every spelling must print the
-   // exact 64-bit value and the exact result of arithmetic with large constants.
-   EXPECT_EQ(scanToString("colsize=21 col=lld:lld:lld"), expectedScanOut);
-   EXPECT_EQ(scanToString("col=21lld:21lld:21lld"), expectedScanOut);
-   EXPECT_EQ(scanToString("colsize=21 col=ld:ld:ld"), expectedScanOut);
-   EXPECT_EQ(scanToString("col=21ld:21ld:21ld"), expectedScanOut);
+   if (std::numeric_limits<long double>::digits > std::numeric_limits<double>::digits) {
+      // long double holds the 61-bit value exactly: every spelling must print the
+      // exact 64-bit value and the exact result of arithmetic with large constants.
+      EXPECT_EQ(scanToString("colsize=21 col=lld:lld:lld"), expectedScanOut);
+      EXPECT_EQ(scanToString("col=21lld:21lld:21lld"), expectedScanOut);
+      EXPECT_EQ(scanToString("colsize=21 col=ld:ld:ld"), expectedScanOut);
+      EXPECT_EQ(scanToString("col=21ld:21ld:21ld"), expectedScanOut);
+   } else {
+      // long double is just a 64-bit double here, so the value is genuinely
+      // unrepresentable: PrintValue must emit the known-limitation error (once per
+      // offending value) instead of silently rounding.
+      ROOT::TestSupport::CheckDiagsRAII diags;
+      diags.requiredDiag(kError, "TTreeFormula::PrintValue", "may be inexact", /*matchFullMessage=*/false);
+      scanToString("colsize=21 col=lld:lld:lld");
+      scanToString("col=21lld:21lld:21lld");
+      scanToString("colsize=21 col=ld:ld:ld");
+      scanToString("col=21ld:21ld:21ld");
+   }
 }