Skip to content

Commit 27cbd7b

Browse files
committed
[treeplayer] Report integer Scan values that cannot be printed exactly
Even evaluated through long double, an integer column can still exceed the accumulator's exactly-representable range: above 2^53 where long double is merely a 64-bit double (e.g. macOS ARM), or beyond 2^64 for results that overflow. As explained in the preceding commit, this cannot be fixed without changing TTreeFormula's frozen floating-point arithmetic, so it is a genuine known limitation, not a bug to work around -- but it must not pass silently. TTreeFormula::PrintValue now emits an error whenever the value it is about to print as an integer has reached the point where the long double accumulator can no longer hold every integer exactly, so the printed digits may be rounded. The diagnostic is deliberately a kError, issued for every offending value with no deduplication: silently printing a wrong integer is precisely the trap we want to surface as loudly as possible, even on a many-row Scan. The threshold check is inclusive (>= 2^digits) on purpose: a rounded result can land back exactly on the threshold (e.g. 2^53 + 1 -> 2^53 in a 53-bit type), and an inclusive comparison avoids missing those. Closes #7844. 🤖 Done with the help of [Claude Code](https://claude.com/claude-code) (Claude Opus 4.8)
1 parent e0e9ecb commit 27cbd7b

2 files changed

Lines changed: 87 additions & 19 deletions

File tree

tree/treeplayer/src/TTreeFormula.cxx

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include <cstdio>
5050
#include <cmath>
5151
#include <cstdlib>
52+
#include <limits>
5253
#include <typeinfo>
5354
#include <algorithm>
5455
#include <sstream>
@@ -4033,6 +4034,35 @@ template <> Long64_t fmod_local(Long64_t x, Long64_t y) { return fmod((LongDoubl
40334034
template<typename T> inline void SetMethodParam(TMethodCall *method, T p) { method->SetParam(p); }
40344035
template<> void SetMethodParam(TMethodCall *method, LongDouble_t p) { method->SetParam((Double_t)p); }
40354036

4037+
// TTree::Scan prints each integer column through TTreeFormula, which evaluates it
4038+
// in a `long double` accumulator (for both the "l" and "ll" formats). That holds
4039+
// integers exactly only up to 2^digits (its mantissa size: 64 bits on x86, but
4040+
// just 53 where `long double` is merely a 64-bit `double`, e.g. macOS ARM), so a
4041+
// value beyond that range is printed rounded. This cannot be fixed without
4042+
// changing the (frozen) floating-point arithmetic of TTreeFormula, so it is a
4043+
// known limitation; flag it loudly instead. See
4044+
// https://github.com/root-project/root/issues/7844.
4045+
inline void CheckIntegerPrintPrecision(LongDouble_t evaluated, const char *expression)
4046+
{
4047+
// Integers are represented exactly only while their magnitude stays below
4048+
// 2^digits. At or above that threshold not every integer is representable, so
4049+
// the printed value may be rounded -- and a rounded result can even land back
4050+
// on the threshold (e.g. 2^53 + 1 -> 2^53 in a 53-bit type), so the comparison
4051+
// is inclusive to avoid missing such cases.
4052+
const LongDouble_t threshold = std::ldexp(1.0L, std::numeric_limits<LongDouble_t>::digits); // 2^digits
4053+
// Deliberately a kError (not kWarning) emitted for every offending value with
4054+
// no deduplication: silently printing a wrong integer is the trap we want to
4055+
// make impossible to miss, so the diagnostic is intentionally as loud as
4056+
// possible even on a many-row Scan.
4057+
if (evaluated >= threshold || evaluated <= -threshold)
4058+
::Error("TTreeFormula::PrintValue",
4059+
"the integer value of \"%s\" may be inexact: its magnitude reaches 2^%d, "
4060+
"the point beyond which the long double used to evaluate it can no longer "
4061+
"represent every integer exactly, so the printed value may be rounded. "
4062+
"This is a known limitation "
4063+
"(https://github.com/root-project/root/issues/7844).",
4064+
expression, std::numeric_limits<LongDouble_t>::digits);
4065+
}
40364066
}
40374067

40384068
template<typename T> inline T TTreeFormula::GetConstant(Int_t k) { return fConst[k]; }
@@ -5072,8 +5102,22 @@ char *TTreeFormula::PrintValue(Int_t mode, Int_t instance, const char *decform)
50725102
{
50735103
switch (outputSizeLevel) {
50745104
case 0: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Short_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
5075-
case 2: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Long_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
5076-
case 3: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Long64_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
5105+
// Evaluate both the "long" ("l") and "long long" ("ll") formats
5106+
// through `long double` so that, where it is wide enough (x86),
5107+
// the full 64-bit value prints exactly; this keeps floating-point
5108+
// arithmetic semantics unchanged.
5109+
case 2: {
5110+
LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
5111+
CheckIntegerPrintPrecision(v, GetTitle());
5112+
snprintf(value, kMAXLENGTH, Form("%%%s", decform), (Long_t)v);
5113+
break;
5114+
}
5115+
case 3: {
5116+
LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
5117+
CheckIntegerPrintPrecision(v, GetTitle());
5118+
snprintf(value, kMAXLENGTH, Form("%%%s", decform), (Long64_t)v);
5119+
break;
5120+
}
50775121
case 1:
50785122
default: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(Int_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
50795123
}
@@ -5086,8 +5130,21 @@ char *TTreeFormula::PrintValue(Int_t mode, Int_t instance, const char *decform)
50865130
{
50875131
switch (outputSizeLevel) {
50885132
case 0: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(UShort_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
5089-
case 2: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(ULong_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
5090-
case 3: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(ULong64_t)((TTreeFormula*)this)->EvalInstance<LongDouble_t>(instance)); break;
5133+
// See the signed 'd'/'i' case above: both "l" and "ll" evaluate
5134+
// through `long double` to print the full value exactly where it
5135+
// is wide enough, without changing arithmetic semantics.
5136+
case 2: {
5137+
LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
5138+
CheckIntegerPrintPrecision(v, GetTitle());
5139+
snprintf(value, kMAXLENGTH, Form("%%%s", decform), (ULong_t)v);
5140+
break;
5141+
}
5142+
case 3: {
5143+
LongDouble_t v = ((TTreeFormula *)this)->EvalInstance<LongDouble_t>(instance);
5144+
CheckIntegerPrintPrecision(v, GetTitle());
5145+
snprintf(value, kMAXLENGTH, Form("%%%s", decform), (ULong64_t)v);
5146+
break;
5147+
}
50915148
case 1:
50925149
default: snprintf(value,kMAXLENGTH,Form("%%%s",decform),(UInt_t)((TTreeFormula*)this)->EvalInstance(instance)); break;
50935150
}

tree/treeplayer/test/regressions.cxx

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -596,15 +596,14 @@ TEST(TTreeScan, TTreeGetBranchOfFriendTChain)
596596
// evaluated and printed as a double, rounding anything above 2^53.
597597
TEST(TTreeScan, ULong64Precision)
598598
{
599-
// The "long long" Scan/Draw column format is evaluated through `long double`
600-
// (see TTreeFormula::PrintValue), so exact 64-bit integer output is only
601-
// possible where `long double` has more mantissa bits than `double`. That is
602-
// the case on x86-64 (80-bit, 64-bit mantissa) but not, e.g., on macOS ARM
603-
// where `long double` is just a 64-bit `double` (53-bit mantissa). Skip the
604-
// exactness check there, since the value genuinely cannot be represented.
605-
if (std::numeric_limits<long double>::digits <= std::numeric_limits<double>::digits)
606-
GTEST_SKIP() << "long double is not wider than double here; the 64-bit value "
607-
"is genuinely unrepresentable and exactness cannot be checked";
599+
// The "long" ("ld") and "long long" ("lld") Scan column formats are both
600+
// evaluated through `long double` (see TTreeFormula::PrintValue), so exact
601+
// 64-bit integer output is only possible where `long double` has more mantissa
602+
// bits than `double`. That is the case on x86-64 (80-bit, 64-bit mantissa) but
603+
// not, e.g., on macOS ARM where `long double` is just a 64-bit `double` (53-bit
604+
// mantissa). This test checks the exact output on the former and, on the
605+
// latter, that the value's unrepresentability is reported as the
606+
// known-limitation error rather than silently rounded.
608607

609608
// 1617047019150033926 needs 61 bits, so it cannot be represented exactly
610609
// by a double (53-bit mantissa).
@@ -658,10 +657,22 @@ TEST(TTreeScan, ULong64Precision)
658657
// off-by-one in the length-modifier detection, but both must behave identically.
659658
// The "ld" ("long") and "lld" ("long long") formats must behave identically too,
660659
// as both evaluate through `long double`.
661-
// long double holds the 61-bit value exactly: every spelling must print the
662-
// exact 64-bit value and the exact result of arithmetic with large constants.
663-
EXPECT_EQ(scanToString("colsize=21 col=lld:lld:lld"), expectedScanOut);
664-
EXPECT_EQ(scanToString("col=21lld:21lld:21lld"), expectedScanOut);
665-
EXPECT_EQ(scanToString("colsize=21 col=ld:ld:ld"), expectedScanOut);
666-
EXPECT_EQ(scanToString("col=21ld:21ld:21ld"), expectedScanOut);
660+
if (std::numeric_limits<long double>::digits > std::numeric_limits<double>::digits) {
661+
// long double holds the 61-bit value exactly: every spelling must print the
662+
// exact 64-bit value and the exact result of arithmetic with large constants.
663+
EXPECT_EQ(scanToString("colsize=21 col=lld:lld:lld"), expectedScanOut);
664+
EXPECT_EQ(scanToString("col=21lld:21lld:21lld"), expectedScanOut);
665+
EXPECT_EQ(scanToString("colsize=21 col=ld:ld:ld"), expectedScanOut);
666+
EXPECT_EQ(scanToString("col=21ld:21ld:21ld"), expectedScanOut);
667+
} else {
668+
// long double is just a 64-bit double here, so the value is genuinely
669+
// unrepresentable: PrintValue must emit the known-limitation error (once per
670+
// offending value) instead of silently rounding.
671+
ROOT::TestSupport::CheckDiagsRAII diags;
672+
diags.requiredDiag(kError, "TTreeFormula::PrintValue", "may be inexact", /*matchFullMessage=*/false);
673+
scanToString("colsize=21 col=lld:lld:lld");
674+
scanToString("col=21lld:21lld:21lld");
675+
scanToString("colsize=21 col=ld:ld:ld");
676+
scanToString("col=21ld:21ld:21ld");
677+
}
667678
}

0 commit comments

Comments
 (0)