Skip to content

Commit f0ee903

Browse files
authored
Fix RDF parsing of class members confused for existing columns
1 parent 0be6ecb commit f0ee903

2 files changed

Lines changed: 62 additions & 2 deletions

File tree

tree/dataframe/src/RDFInterfaceUtils.cxx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ std::pair<ColumnNames_t, ColumnNames_t> FindUsedColsAndAliases(const std::string
106106
// token is not a potential variable name, skip it
107107
continue;
108108
}
109+
// Skip symbols that are member accesses (obj.method) — they are not column references.
110+
// lexertk does not produce dot-prefixed tokens, so the token immediately before a method
111+
// name is always the literal "." token when it is a member access.
112+
if (i > 0 && tokens[i - 1].value == ".") {
113+
continue;
114+
}
109115

110116
ColumnNames_t potentialColNames({tok.value});
111117

@@ -159,7 +165,7 @@ TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
159165

160166
for (const auto &alias : usedAliases) {
161167
const auto &col = colRegister.ResolveAlias(alias);
162-
TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
168+
TPRegexp replacer("(?<!\\.)\\b" + EscapeDots(alias) + "\\b");
163169
replacer.Substitute(out, col.data(), "g");
164170
}
165171

@@ -200,7 +206,7 @@ ParsedExpression ParseRDFExpression(std::string_view expr, const ROOT::Internal:
200206
[](const std::string &a, const std::string &b) { return a.size() > b.size(); });
201207
for (const auto &col : usedCols) {
202208
const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
203-
TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
209+
TPRegexp replacer("(?<!\\.)\\b" + EscapeDots(col) + "\\b");
204210
replacer.Substitute(exprWithVars, varNames[varIdx], "g");
205211
}
206212

tree/dataframe/test/dataframe_colnames.cxx

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "ROOT/RDataFrame.hxx"
2+
#include <TInterpreter.h>
23
#include <TTree.h>
34

45
#include "gtest/gtest.h"
@@ -89,3 +90,56 @@ TEST(Aliases, FilterOnAliasJit)
8990
auto c = tdf.Define("c0", [&i]() { return i++; }).Alias("c1", "c0").Filter("c1>1").Count();
9091
EXPECT_EQ(1U, *c);
9192
}
93+
94+
// Regression test for https://github.com/root-project/root/issues/22295
95+
// A column named "phi" must not be confused with a .phi() method call in a JIT expression.
96+
TEST(ColNames, MethodCallNotConfusedWithColumn)
97+
{
98+
// Declare minimal helper types for use in JIT strings.
99+
// Using gInterpreter::Declare avoids any dependency on GenVector or other libraries.
100+
gInterpreter->Declare("struct RDF22295Vec { double phi() const { return 42.0; } };");
101+
gInterpreter->Declare("struct RDF22295Mem { double phi = 1.5; };");
102+
103+
// Case 1: column "phi" exists, expression calls .phi() method — must not throw,
104+
// and the result must be the value returned by the method (42.0), not a substitution
105+
// artifact. Before the fix this crashed with "no member named 'var0'" because the
106+
// parser replaced .phi() with .var0().
107+
{
108+
ROOT::RDataFrame df(1);
109+
auto df2 = df.Define("phi", "1.0");
110+
std::vector<double> v;
111+
EXPECT_NO_THROW({
112+
auto df3 = df2.Define("result", "RDF22295Vec{}.phi()");
113+
v = *df3.Take<double>("result");
114+
});
115+
EXPECT_NEAR(42.0, v[0], 1e-9);
116+
}
117+
118+
// Case 2: column "phi" exists, expression uses BOTH .phi() method call AND standalone
119+
// phi column reference — only the standalone reference must be substituted.
120+
{
121+
ROOT::RDataFrame df(1);
122+
auto df2 = df.Define("phi", "0.5");
123+
std::vector<double> v;
124+
EXPECT_NO_THROW({
125+
auto df3 = df2.Define("result", "RDF22295Vec{}.phi() + phi");
126+
v = *df3.Take<double>("result");
127+
});
128+
// .phi() returns 42.0; standalone phi column = 0.5
129+
EXPECT_NEAR(42.5, v[0], 1e-9);
130+
}
131+
132+
// Case 3: data-member access obj.phi (not a function call) must still work after the
133+
// fix — the preceding-dot guard must not break legitimate dot-chain expressions.
134+
{
135+
ROOT::RDataFrame df(1);
136+
auto df2 = df.Define("phi", "0.5");
137+
std::vector<double> v;
138+
EXPECT_NO_THROW({
139+
auto df3 = df2.Define("result", "RDF22295Mem{}.phi + phi");
140+
v = *df3.Take<double>("result");
141+
});
142+
// data member .phi = 1.5; standalone phi column = 0.5
143+
EXPECT_NEAR(2.0, v[0], 1e-9);
144+
}
145+
}

0 commit comments

Comments
 (0)