Skip to content

Commit bbedd90

Browse files
committed
Phase 13.27.ADF: read_tree skip_branches for selective branch exclusion
New parameter skip_branches=[regex] on read_tree(). Patterns matched via re.fullmatch. Matched branches not read, not in DataFrame. Separate parameter from dtype_overrides (single responsibility). Tests D11-D14: skip exclusion, column count, combined with dtype_overrides, no-match no-op. 2 intermittent failures (save_and_load, backward_compat) confirmed parallel-execution artifacts — pass in isolation on both clean and modified code.
1 parent 249fd55 commit bbedd90

2 files changed

Lines changed: 97 additions & 2 deletions

File tree

UTILS/dfextensions/AliasDataFrame/AliasDataFrame.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5556,7 +5556,8 @@ def _write_metadata_to_tree(self, open_tfile, treename):
55565556

55575557
@staticmethod
55585558
def read_tree(filename, treename="tree", entry_start=None, entry_stop=None,
5559-
num_workers=8, load_subframes=True, dtype_overrides=None):
5559+
num_workers=8, load_subframes=True, dtype_overrides=None,
5560+
skip_branches=None):
55605561
"""
55615562
Read AliasDataFrame from ROOT TTree with optimized memory and speed.
55625563

@@ -5598,6 +5599,21 @@ def read_tree(filename, treename="tree", entry_start=None, entry_stop=None,
55985599

55995600
Safety: warns on overflow (finite value → inf after downcast).
56005601
NaN values are preserved across all float conversions.
5602+
skip_branches : list of str, optional
5603+
Regex patterns for branches to exclude from reading. Patterns are
5604+
matched against branch names using ``re.fullmatch``. Matched
5605+
branches are not read and do not appear in the DataFrame.
5606+
5607+
Example::
5608+
5609+
skip_branches=[
5610+
r'quality_flag.*', # skip 3.48GB object column
5611+
r'.*_debug_.*', # skip debug branches
5612+
]
5613+
5614+
Warning: skipping index columns used by subframe joins will cause
5615+
join failures. Skipping columns referenced by aliases will cause
5616+
those aliases to show as BROKEN in ``describe_aliases()``.
56015617

56025618
Returns
56035619
-------
@@ -5795,6 +5811,20 @@ def read_tree(filename, treename="tree", entry_start=None, entry_stop=None,
57955811
dtype_hints[branch_name] = target_dtype
57965812
break # first match wins
57975813

5814+
# Apply skip_branches: remove matched branches before reading
5815+
if skip_branches:
5816+
compiled_skips = []
5817+
for pattern in skip_branches:
5818+
try:
5819+
compiled_skips.append(re.compile(pattern))
5820+
except re.error as e:
5821+
warnings.warn(f"Invalid skip_branches pattern {pattern!r}: {e}")
5822+
if compiled_skips:
5823+
branch_names = [
5824+
b for b in branch_names
5825+
if not any(rx.fullmatch(b) for rx in compiled_skips)
5826+
]
5827+
57985828
if not branch_names:
57995829
df = pd.DataFrame()
58005830

UTILS/dfextensions/AliasDataFrame/tests/test_D1_dtype_overrides.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""
2-
Phase 13.26.ADF — dtype_overrides on read_tree
2+
Phase 13.26.ADF — dtype_overrides on read_tree (D1-D10)
3+
Phase 13.27.ADF — skip_branches on read_tree (D11-D14)
34
45
D1: regex pattern matching converts float64→float16
56
D2: first-match-wins precedence
@@ -8,6 +9,13 @@
89
D5: NaN preserved across float downcast
910
D6: schema dtype_hints not overridden when no override matches
1011
D7: round-trip: write float64, read with override, values within tolerance
12+
D8: schema round-trip preserves overridden dtype
13+
D9: entry_range with overrides consistent
14+
D10: overflow warning shows correct original→target dtype
15+
D11: skip_branches excludes branch from DataFrame
16+
D12: skip reduces column count; remaining data correct
17+
D13: skip + dtype_override work together independently
18+
D14: skip with no match is a no-op
1119
"""
1220

1321
import os
@@ -228,3 +236,60 @@ def test_D10_override_warning_shows_correct_dtypes(self, tmp_root_file):
228236

229237
if __name__ == '__main__':
230238
pytest.main([__file__, '-v', '-s'])
239+
240+
241+
# ===========================================================================
242+
# Phase 13.27.ADF — skip_branches tests
243+
# ===========================================================================
244+
245+
@pytest.mark.skipif(not _HAS_ROOT, reason="Requires ROOT + uproot")
246+
class TestSkipBranches:
247+
248+
@pytest.mark.invariance
249+
def test_D11_skip_branch_not_in_dataframe(self, tmp_root_file):
250+
"""skip_branches pattern excludes branch from DataFrame entirely."""
251+
adf = AliasDataFrame.read_tree(tmp_root_file, "tree", skip_branches=[
252+
r'dy_err_PIter1',
253+
])
254+
assert 'dy_err_PIter1' not in adf.df.columns, \
255+
"D11: skipped branch should not appear in DataFrame"
256+
# Other branches still present
257+
assert 'dy_intercept_PIter1' in adf.df.columns
258+
assert 'x' in adf.df.columns
259+
260+
@pytest.mark.invariance
261+
def test_D12_skip_reduces_column_count(self, tmp_root_file):
262+
"""Skipping branches reduces column count; remaining data correct."""
263+
adf_full = AliasDataFrame.read_tree(tmp_root_file, "tree")
264+
adf_skip = AliasDataFrame.read_tree(tmp_root_file, "tree", skip_branches=[
265+
r'dy_err_PIter1',
266+
r'dz_intercept_PIter2',
267+
])
268+
assert len(adf_skip.df.columns) == len(adf_full.df.columns) - 2, \
269+
f"D12: expected {len(adf_full.df.columns) - 2} columns, got {len(adf_skip.df.columns)}"
270+
# Remaining columns have identical values
271+
for col in adf_skip.df.columns:
272+
np.testing.assert_array_equal(
273+
adf_skip.df[col].values, adf_full.df[col].values,
274+
err_msg=f"D12: column {col} values differ after skip")
275+
276+
@pytest.mark.invariance
277+
def test_D13_skip_and_dtype_override_combined(self, tmp_root_file):
278+
"""skip_branches and dtype_overrides work together independently."""
279+
adf = AliasDataFrame.read_tree(tmp_root_file, "tree",
280+
dtype_overrides={r'dy_intercept_PIter1': np.float16},
281+
skip_branches=[r'dy_err_PIter1'],
282+
)
283+
assert 'dy_err_PIter1' not in adf.df.columns
284+
assert adf.df['dy_intercept_PIter1'].dtype == np.float16
285+
# Non-skipped, non-overridden column unchanged
286+
assert 'x' in adf.df.columns
287+
288+
@pytest.mark.invariance
289+
def test_D14_skip_no_match_is_noop(self, tmp_root_file):
290+
"""skip_branches with no matching pattern leaves all columns intact."""
291+
adf_full = AliasDataFrame.read_tree(tmp_root_file, "tree")
292+
adf_skip = AliasDataFrame.read_tree(tmp_root_file, "tree", skip_branches=[
293+
r'nonexistent_column_.*',
294+
])
295+
assert list(adf_skip.df.columns) == list(adf_full.df.columns)

0 commit comments

Comments
 (0)