Skip to content

Commit c884b3b

Browse files
committed
Phase 13.58.ADF: nested (recursive) subframe-column lazy draw
Extends single-level subframe lazy draw (8e1221b) to nested refs "A.B.col". Same logic, applied recursively -- no new draw logic; the eager merge already walks the chain. - _load_lazy_subframe: when a lazy subframe materializes, recover its own child subframes (read_adf_metadata on the subframe's tree) and register them as lazy on its frame, so the next chain segment is resolvable. - _lazy_ensure_subframe_refs / _lazy_materialize_subframe_chain: walk the full dotted chain (A, then B inside A's frame, ...), loading each level's index columns and materializing each subframe; stop at the first non-subframe segment (unresolved -> draw fails loud). - Fix latent typo self._df -> self.df in the subframe index-column validation warning; never exercised before because lazy subframes only existed on the lazy-main frame, which takes the _lazy_reader branch. Surfaced now that a lazy subframe materializes on a non-lazy (already-materialized) parent frame. Test: test_phase1358_lazy_draw_invariance.py::test_lazy_nested_subframe_column_draw -- two-level main->A->B drawn via "A.B.col:x", lazy == eager, exact-load {run, x} (decoys at every level excluded). Single-level test and calibITS suite unchanged and passing.
1 parent 8e1221b commit c884b3b

2 files changed

Lines changed: 108 additions & 27 deletions

File tree

UTILS/dfextensions/AliasDataFrame/AliasDataFrame.py

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,36 +2097,48 @@ def ensure_subframe(self, name: str) -> None:
20972097
self._load_lazy_subframe(name)
20982098

20992099
def _lazy_ensure_subframe_refs(self, text):
2100-
"""Materialize any *lazy* subframes referenced as "<sf>.<col>" in the given text
2101-
(expr/selection/group_by/...), using the existing ensure_subframe machinery, and
2102-
load each subframe's index columns into the main frame so the draw-time join
2103-
resolves.
2104-
2105-
This must run BEFORE get_required_branches and the subframe merge: a lazy subframe
2106-
(in self._subframe_readers) is not in the eager registry that the expression
2107-
analyzer and the merge block consult, so "A.col" would otherwise collapse to the
2108-
bare name "A" and the subframe would never be materialized. ensure_subframe registers
2109-
the loaded subframe into self._subframes (the UNIFICATION PRINCIPLE), after which the
2110-
existing eager resolution path handles it unchanged.
2111-
2112-
Single-level refs ("A.col"). Nested refs ("A.B.col") materialize the outer subframe;
2113-
deeper lazy resolution is chain scope (13.61).
2114-
"""
2115-
readers = getattr(self, '_subframe_readers', None)
2116-
if not readers or self._lazy_reader is None:
2100+
"""Materialize the *lazy* subframe chain(s) referenced as "<sf>.<col>" or
2101+
"<sf>.<sub>...<col>" in the given text (expr/selection/group_by/...), using the
2102+
existing ensure_subframe machinery, so the analyzer and the subframe merge recognize
2103+
them and the eager merge can resolve the dotted ref.
2104+
2105+
Walks the whole chain: for "A.B.col" it materializes A, then B inside A's frame
2106+
(nested subframes register on materialization, see _load_lazy_subframe). Each level's
2107+
index columns are loaded into that level's frame so the join resolves. A segment whose
2108+
subframe is not a registered lazy subframe (e.g. names-only recovery, or a leaf
2109+
column) stops the walk; an unresolved ref then fails loud at draw time, never silent.
2110+
"""
2111+
if self._lazy_reader is None and not getattr(self, '_subframe_readers', None):
21172112
return
21182113
import re as _re
2119-
available = getattr(self._lazy_reader, 'available_branches', set())
21202114
for tok in set(_re.findall(r'\b(\w+(?:\.\w+)+)\b', text or '')):
2121-
sf = tok.split('.', 1)[0]
2122-
if sf in readers and not self._subframe_loaded.get(sf, False):
2123-
cfg = getattr(self, '_subframe_lazy_config', {}).get(sf)
2124-
if cfg:
2115+
segs = tok.split('.')
2116+
# all leading segments except the final one (the column) are candidate subframes
2117+
self._lazy_materialize_subframe_chain(segs[:-1])
2118+
2119+
def _lazy_materialize_subframe_chain(self, names):
2120+
"""Materialize a chain of lazy subframes (e.g. ['A', 'B']) level by level, descending
2121+
into each materialized subframe's frame. Stops at the first segment that is not a
2122+
registered lazy subframe."""
2123+
current = self
2124+
for nm in names:
2125+
readers = getattr(current, '_subframe_readers', None) or {}
2126+
if nm in readers and not current._subframe_loaded.get(nm, False):
2127+
cfg = getattr(current, '_subframe_lazy_config', {}).get(nm)
2128+
# load this level's index columns into the current frame (join keys); only the
2129+
# lazy-main frame needs this -- a materialized subframe frame already holds all
2130+
# its columns.
2131+
if cfg and getattr(current, '_lazy_reader', None) is not None:
2132+
available = current._lazy_reader.available_branches
21252133
idx_to_load = (set(cfg.get('index_columns') or [])
2126-
- self._lazy_reader.loaded_branches) & available
2134+
- current._lazy_reader.loaded_branches) & available
21272135
if idx_to_load:
2128-
self.ensure_branches(list(idx_to_load))
2129-
self.ensure_subframe(sf)
2136+
current.ensure_branches(list(idx_to_load))
2137+
current.ensure_subframe(nm)
2138+
entry = current._subframes.get_entry(nm) if hasattr(current, '_subframes') else None
2139+
if not entry:
2140+
break # not a subframe (leaf column) or unresolved -> stop; draw fails loud
2141+
current = entry['frame']
21302142

21312143
def _load_lazy_subframe(self, name: str) -> None:
21322144
"""
@@ -2166,6 +2178,33 @@ def _load_lazy_subframe(self, name: str) -> None:
21662178

21672179
# Mark as loaded
21682180
self._subframe_loaded[name] = True
2181+
2182+
# Phase 13.58 (nested): recover and register this subframe's OWN child subframes as
2183+
# lazy on its frame, so a nested ref ("A.B.col") can walk one level deeper. The child
2184+
# data lives in sibling trees "<this_tree>__subframe__<child>"; index columns come
2185+
# from the child's recovered metadata. Children without usable index columns are
2186+
# skipped (the draw then fails loud, never silently wrong) -- same contract as the
2187+
# top level.
2188+
sf_file = config.get('file')
2189+
sf_tree = config.get('tree')
2190+
if sf_file and sf_tree:
2191+
try:
2192+
from adf_metadata_compat import read_adf_metadata
2193+
child_meta = read_adf_metadata(sf_file, sf_tree)
2194+
child_idx = child_meta.get('subframe_indices') or {}
2195+
for child in (child_meta.get('subframes') or []):
2196+
cidx = child_idx.get(child)
2197+
if not cidx:
2198+
continue
2199+
if subframe_adf._subframes.has_subframe(child) or child in subframe_adf._subframe_readers:
2200+
continue
2201+
subframe_adf.register_subframe_lazy(
2202+
child, sf_file,
2203+
tree_name=f"{sf_tree}__subframe__{child}",
2204+
index_columns=cidx,
2205+
)
2206+
except Exception as e:
2207+
warnings.warn(f"_load_lazy_subframe: nested-subframe recovery for '{name}' failed: {e}")
21692208

21702209
# Validate index columns exist in main DataFrame
21712210
# CRITICAL: Check lazy reader FIRST to avoid triggering main load
@@ -2178,9 +2217,9 @@ def _load_lazy_subframe(self, name: str) -> None:
21782217
f"Subframe '{name}' index column(s) {sorted(missing_in_main)} "
21792218
f"not found in main DataFrame available branches."
21802219
)
2181-
elif len(self._df) > 0:
2220+
elif len(self.df) > 0:
21822221
# Eager main: safe to check columns directly
2183-
missing_in_main = set(config['index_columns']) - set(self._df.columns)
2222+
missing_in_main = set(config['index_columns']) - set(self.df.columns)
21842223
if missing_in_main:
21852224
warnings.warn(
21862225
f"Subframe '{name}' index column(s) {sorted(missing_in_main)} "

UTILS/dfextensions/AliasDataFrame/tests/test_phase1358_lazy_draw_invariance.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,5 +322,47 @@ def _build():
322322
assert "A" in lazy.list_subframes() # materialized on demand by the draw
323323

324324

325+
@pytest.mark.invariance
326+
def test_lazy_nested_subframe_column_draw(tmp_path):
327+
"""Phase 13.58 (nested): recursive subframe-column lazy draw. A two-level chain
328+
main -> A -> B drawn via 'A.B.col:x' materializes the whole chain on demand (A, then B
329+
registered on A's frame at materialization) and matches eager exactly, loading only the
330+
join key + main branch (not the decoys at any level)."""
331+
import warnings as _warnings
332+
rng = np.random.default_rng(3); n = 300
333+
run = np.arange(n); x = rng.random(n); col = rng.random(n)
334+
335+
def _build():
336+
B = AliasDataFrame(pd.DataFrame({"run": run.copy(), "col": col.copy(),
337+
"bdecoy": rng.random(n)}))
338+
A = AliasDataFrame(pd.DataFrame({"run": run.copy(), "a": rng.random(n)}))
339+
A.register_subframe("B", B, index_columns=["run"])
340+
m = AliasDataFrame(pd.DataFrame({"run": run.copy(), "x": x.copy(),
341+
"unused": rng.random(n)}))
342+
m.register_subframe("A", A, index_columns=["run"])
343+
return m
344+
345+
p = str(tmp_path / "nested.root")
346+
src = _build()
347+
with _warnings.catch_warnings():
348+
_warnings.simplefilter("ignore")
349+
with uproot.recreate(p) as f:
350+
src._write_all_data_to_uproot(f, "t", True)
351+
src._write_all_metadata_to_key(p, "t")
352+
353+
lazy = AliasDataFrame.read_tree_lazy(p, "t"); lazy.draw_lazy = True
354+
eager = _build(); eager.draw_lazy = True
355+
assert lazy._lazy_reader.loaded_branches == set()
356+
357+
out_l = lazy.draw(expr="A.B.col:x", type="profile", bins=12, return_data=True)
358+
out_e = eager.draw(expr="A.B.col:x", type="profile", bins=12, return_data=True)
359+
assert isinstance(out_l, tuple) and len(out_l) > 2
360+
_cmp_stats(out_l[2], out_e[2], "nested_subframe_draw") # lazy == eager
361+
362+
assert lazy._lazy_reader.loaded_branches == {"run", "x"}, \
363+
f"loaded {sorted(lazy._lazy_reader.loaded_branches)} != {{run, x}}"
364+
assert "A" in lazy.list_subframes() # outer materialized
365+
366+
325367
if __name__ == "__main__":
326368
sys.exit(pytest.main([__file__, "-v"]))

0 commit comments

Comments
 (0)