Skip to content

Commit d377a7b

Browse files
committed
BUG FIX: group_by expression not materialized in ADF.draw()
P0: adf.draw(group_by='row%3') crashes after dfdraw Phase 13.30 validates that group_by must be a real column. ADF materializes plot expressions but not group_by expressions. Fix: 5 lines before DFDraw construction — if group_by is a string that is neither a column nor an alias, materialize it as a per-call temp column via df_subset.eval(). No persistent alias pollution. Tests G1-G4: expression materializes, existing column unchanged, alias works (pre-materialized), no alias pollution. Upstream: dfdraw PHASE_13_30_DF_ColumnRefValidation_v1_0_END. BUG_AliasDataFrame_20260512_groupby_expression_materialization.
1 parent 9c904b3 commit d377a7b

2 files changed

Lines changed: 118 additions & 0 deletions

File tree

UTILS/dfextensions/AliasDataFrame/AliasDataFrame.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10987,6 +10987,18 @@ def draw(self,
1098710987
if 'group_by' in kwargs and isinstance(kwargs.get('group_by'), str):
1098810988
kwargs['group_by'] = kwargs['group_by'].replace(dot_ref, flat_ref)
1098910989

10990+
# ── group_by expression materialization (BUG_ADF_GroupByExpressionMaterialization) ──
10991+
# dfdraw requires group_by to be a real column (Phase 13.30 contract).
10992+
# If group_by is a computed expression (e.g., "row%3"), materialize it
10993+
# as a per-call temp column on df_subset. No persistent alias created.
10994+
group_by = kwargs.get('group_by')
10995+
if (group_by is not None
10996+
and isinstance(group_by, str)
10997+
and group_by not in df_subset.columns
10998+
and group_by not in self.aliases):
10999+
df_subset = df_subset.copy()
11000+
df_subset[group_by] = df_subset.eval(group_by)
11001+
1099011002
# Create plotter and delegate
1099111003
plotter = DFDraw(df_subset)
1099211004

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""
2+
BUG_ADF_GroupBy_Expression_Materialization — regression tests.
3+
4+
dfdraw Phase 13.30 validates that group_by must be a real column.
5+
ADF.draw() must materialize group_by expressions (e.g., "row%3")
6+
as per-call temp columns before forwarding to dfdraw.
7+
8+
G1: arithmetic expression materializes through ADF
9+
G2: existing column unchanged
10+
G3: alias works (with explicit materialization)
11+
G4: no alias pollution after expression group_by
12+
"""
13+
14+
import os
15+
import sys
16+
import pytest
17+
import numpy as np
18+
import pandas as pd
19+
20+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
21+
from AliasDataFrame import AliasDataFrame
22+
23+
try:
24+
import matplotlib
25+
matplotlib.use('Agg')
26+
from dfdraw import DFDraw
27+
_HAS_DFDRAW = True
28+
except ImportError:
29+
_HAS_DFDRAW = False
30+
31+
32+
@pytest.fixture
33+
def adf_for_groupby():
34+
"""ADF with integer row column suitable for modular arithmetic."""
35+
rng = np.random.default_rng(42)
36+
n = 90
37+
df = pd.DataFrame({
38+
'x': np.linspace(0, 1, n, dtype=np.float32),
39+
'y': rng.standard_normal(n).astype(np.float32),
40+
'row': np.arange(n, dtype=np.int16),
41+
'sector': np.tile(np.arange(3, dtype=np.int8), n // 3),
42+
})
43+
return AliasDataFrame(df)
44+
45+
46+
@pytest.mark.skipif(not _HAS_DFDRAW, reason="Requires dfdraw")
47+
class TestGroupByExpressionMaterialization:
48+
49+
@pytest.mark.invariance
50+
def test_G1_arithmetic_expression_materializes(self, adf_for_groupby):
51+
"""
52+
adf.draw(group_by='row%3') must produce a grouped figure,
53+
NOT raise ValueError from dfdraw Phase 13.30 validator.
54+
Regression for BUG_ADF_GroupBy_Expression_Materialization.
55+
"""
56+
adf = adf_for_groupby
57+
fig, ax, stats = adf.draw(
58+
'y:x', type='profile',
59+
group_by='row%3',
60+
bins=10, min_entries=2,
61+
)
62+
# Three distinct row%3 values (0, 1, 2) → at least 3 lines
63+
assert len(ax.get_lines()) >= 3
64+
assert ax.get_legend() is not None
65+
66+
@pytest.mark.invariance
67+
def test_G2_existing_column_unchanged(self, adf_for_groupby):
68+
"""group_by with existing column name works without materialization."""
69+
adf = adf_for_groupby
70+
fig, ax, stats = adf.draw(
71+
'y:x', type='profile',
72+
group_by='sector',
73+
bins=10, min_entries=2,
74+
)
75+
assert len(ax.get_lines()) >= 3
76+
77+
@pytest.mark.invariance
78+
def test_G3_alias_works(self, adf_for_groupby):
79+
"""group_by referencing an alias works when pre-materialized."""
80+
adf = adf_for_groupby
81+
adf.add_alias('row_mod3', 'row%3', dtype=np.int8)
82+
adf.materialize_aliases(names=['row_mod3'])
83+
fig, ax, stats = adf.draw(
84+
'y:x', type='profile',
85+
group_by='row_mod3',
86+
bins=10, min_entries=2,
87+
)
88+
assert len(ax.get_lines()) >= 3
89+
90+
@pytest.mark.invariance
91+
def test_G4_no_alias_pollution(self, adf_for_groupby):
92+
"""
93+
Per-call materialization MUST NOT leak as a persistent alias.
94+
After adf.draw(group_by='row%3'), 'row%3' should not be in
95+
adf.aliases — the temp column lived only for that call.
96+
"""
97+
adf = adf_for_groupby
98+
assert 'row%3' not in adf.aliases
99+
adf.draw('y:x', type='profile', group_by='row%3',
100+
bins=5, min_entries=2)
101+
assert 'row%3' not in adf.aliases, \
102+
"Expression group_by leaked as persistent alias"
103+
104+
105+
if __name__ == '__main__':
106+
pytest.main([__file__, '-v', '-s'])

0 commit comments

Comments
 (0)