Skip to content

Commit ad91e25

Browse files
author
miranov25
committed
Phase 13.46.DF FIX1: scatter range= removes out-of-range points.
v1.0 set the view window (set_xlim/set_ylim) — out-of-range points stayed in the collection. Architect 2026-05-28: scatter range must DROP the points, consistent with hist/profile range= excluding points from binning. Filter x_data/y_data/df_filtered by one mask before stats+plotting; all parallel arrays (color/size/marker/error) derive from df_filtered and stay aligned. Non-facet keeps an exact tight view; faceted cells filter and the shared axes autoscale to the union (no last-cell-wins). F.71 locks the point-removal invariant. Gate 1022 -> 1023. Predecessor: PHASE_13_46_DF v1.0 @ 1d77702.
1 parent 1d77702 commit ad91e25

5 files changed

Lines changed: 101 additions & 35 deletions

File tree

UTILS/dfextensions/dfdraw/docs/CAPABILITY_MATRIX.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Capability Matrix — dfdraw
22

3-
**Generated:** 2026-05-28 10:51 UTC
3+
**Generated:** 2026-05-28 13:04 UTC
44
**Phase:** 13.15.DF
55
**Generator:** `scripts/generate_capability_matrix.py`
66
**Sources:** `tests/feature_taxonomy.py` + `tests/test_layer_classification.py`
@@ -9,13 +9,13 @@
99

1010
| Status | Count | % |
1111
|--------|------:|--:|
12-
| ✅ Verified | 55 | 49% |
12+
| ✅ Verified | 56 | 49% |
1313
| ☑️ Smoke-only | 57 | 50% |
1414
| 🧨 Broken | 0 | 0% |
1515
| 📋 Planned | 1 | 1% |
16-
| **Total features** | **113** | |
17-
| **Total proof tests** | **577** | |
18-
| **Invariance tests** | **348** | |
16+
| **Total features** | **114** | |
17+
| **Total proof tests** | **578** | |
18+
| **Invariance tests** | **349** | |
1919

2020
**Status key:**
2121
- ✅ Verified — has at least one invariance test (A ≡ B check)
@@ -179,6 +179,7 @@
179179
|| **API.kwarg_typo_guard** — Kwarg-typo guard (difflib did-you-mean at draw() entry) | 1 | 0 |
180180
| | **RANGE** | | |
181181
|| **RANGE.scatter** — range= on scatter via shared 2D resolver | 4 | 0 |
182+
|| **RANGE.scatter_filter** — range= on scatter removes out-of-range points (FIX1 semantic) | 1 | 0 |
182183
| | **TITLE** | | |
183184
|| **TITLE.get_suptitle**_get_suptitle public-API helper (mpl >= 3.8 + fallback) | 1 | 0 |
184185

UTILS/dfextensions/dfdraw/plots/scatter.py

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,36 @@ def draw_scatter(
196196
pd.to_datetime(_x_arr, unit='s').to_pydatetime()
197197
)
198198

199+
# Phase 13.46.DF FIX1 — range= REMOVES out-of-range points (point filter),
200+
# consistent with how hist/profile range= excludes points from binning. The
201+
# previous v1.0 set_xlim/set_ylim only clipped the VIEW (out-of-range points
202+
# stayed in the collection, off-screen) — architect 2026-05-28: scatter
203+
# range must drop the points. Done here (before stats + plotting) so the
204+
# filtered count is reflected in stats and every parallel array (color,
205+
# size, marker, error bars) — all derived from df_filtered — stays aligned.
206+
_range_xr = _range_yr = None
207+
_range_strategy = None
208+
if range is not None:
209+
from ._autorange import resolve_range_2d
210+
(_range_xr, _range_yr), _range_strategy = resolve_range_2d(
211+
range, x_data, y_data,
212+
style_strategy=get_style_value("autorange.strategy", "hybrid"),
213+
style_k_robust=get_style_value("autorange.k_robust", 4.0),
214+
style_k_outlier=get_style_value("autorange.k_outlier", 1.5),
215+
style_percentile=get_style_value("autorange.percentile", (1.0, 99.0)),
216+
)
217+
_rmask = np.ones(len(x_data), dtype=bool)
218+
if np.all(np.isfinite(_range_xr)) and _range_xr[0] < _range_xr[1]:
219+
_rmask &= (x_data >= _range_xr[0]) & (x_data <= _range_xr[1])
220+
if np.all(np.isfinite(_range_yr)) and _range_yr[0] < _range_yr[1]:
221+
_rmask &= (y_data >= _range_yr[0]) & (y_data <= _range_yr[1])
222+
# Filter x/y and df_filtered by the SAME mask so downstream color/size/
223+
# marker/error helpers (which derive from df_filtered) stay aligned.
224+
x_data = x_data[_rmask]
225+
y_data = y_data[_rmask]
226+
if len(df_filtered) == len(_rmask):
227+
df_filtered = df_filtered[_rmask]
228+
199229
# Statistics
200230
stats_dict = _compute_scatter_stats(x_data, y_data)
201231
# Phase 13.28.DF: Sanitize counters (AD-71)
@@ -207,6 +237,11 @@ def draw_scatter(
207237
if len(x_data) > 0 else ((0.0, 1.0), (0.0, 1.0))
208238
)
209239
stats_dict["autorange_strategy"] = "minmax"
240+
# Honest stats when range= was applied: record the resolved strategy +
241+
# window actually used to filter (overrides the default above).
242+
if _range_strategy is not None:
243+
stats_dict["autorange_used"] = (_range_xr, _range_yr)
244+
stats_dict["autorange_strategy"] = _range_strategy
210245

211246
# Apply jitter
212247
if jitter:
@@ -408,36 +443,15 @@ def draw_scatter(
408443
ax.xaxis.set_major_formatter(mdates.DateFormatter(time_format))
409444
fig.autofmt_xdate()
410445

411-
# Phase 13.46.DF C-9: range= support via the SHARED 2D resolver (AD-74
412-
# per-axis), identical handling to hist/profile/2D — only the application
413-
# differs (set_xlim/set_ylim vs binning, because scatter has no bins).
414-
# Applied AFTER plotting so ax.scatter()'s autoscale cannot override it.
415-
if range is not None:
416-
from ._autorange import resolve_range_2d
417-
(_xr, _yr), _strategy = resolve_range_2d(
418-
range, x_data, y_data,
419-
style_strategy=get_style_value("autorange.strategy", "hybrid"),
420-
style_k_robust=get_style_value("autorange.k_robust", 4.0),
421-
style_k_outlier=get_style_value("autorange.k_outlier", 1.5),
422-
style_percentile=get_style_value("autorange.percentile", (1.0, 99.0)),
423-
)
424-
# Per-cell limit application is suppressed in facet mode: faceted axes
425-
# are SHARED (sharex/sharey), so a per-cell set_xlim would propagate
426-
# and the last cell would clobber all others (silently wrong). In a
427-
# facet grid the shared axes autoscale to the global data extent
428-
# instead. Per-cell strategy tightening under faceting (non-minmax)
429-
# is a Phase 13.46.DF FIX1 item — see CRR §2.
430-
if not _facet_mode:
431-
# Degenerate/empty guard (single point, all-filtered, min==max,
432-
# non-finite): skip set_*lim and let matplotlib autoscale.
433-
if np.all(np.isfinite(_xr)) and _xr[0] < _xr[1]:
434-
ax.set_xlim(_xr)
435-
if np.all(np.isfinite(_yr)) and _yr[0] < _yr[1]:
436-
ax.set_ylim(_yr)
437-
# Honest stats: record the strategy actually resolved (not the
438-
# unconditional "minmax" the default path records above).
439-
stats_dict["autorange_used"] = (_xr, _yr)
440-
stats_dict["autorange_strategy"] = _strategy
446+
# Phase 13.46.DF FIX1 — points are already filtered to the resolved range
447+
# above. Here we only tighten the VIEW to the exact resolved window in the
448+
# non-faceted case (faceted axes are SHARED, so a per-cell set_xlim would
449+
# clobber siblings — those autoscale to the union of filtered data instead).
450+
if _range_strategy is not None and not _facet_mode:
451+
if _range_xr is not None and np.all(np.isfinite(_range_xr)) and _range_xr[0] < _range_xr[1]:
452+
ax.set_xlim(_range_xr)
453+
if _range_yr is not None and np.all(np.isfinite(_range_yr)) and _range_yr[0] < _range_yr[1]:
454+
ax.set_ylim(_range_yr)
441455

442456
return fig, ax, stats_dict
443457

UTILS/dfextensions/dfdraw/tests/feature_taxonomy.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,14 @@
15011501
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f69b_profile_hist_range_minmax_no_unpack_error",
15021502
],
15031503
},
1504+
{
1505+
"id": "RANGE.scatter_filter",
1506+
"name": "range= on scatter removes out-of-range points (FIX1 semantic)",
1507+
"category": "RANGE",
1508+
"tests": [
1509+
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f71_scatter_range_removes_out_of_range_points",
1510+
],
1511+
},
15041512
{
15051513
"id": "TITLE.get_suptitle",
15061514
"name": "_get_suptitle public-API helper (mpl >= 3.8 + fallback)",

UTILS/dfextensions/dfdraw/tests/test_layer_classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@
441441
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f69a_scatter_range_strategy_parity": "invariance",
442442
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f69b_profile_hist_range_minmax_no_unpack_error": "invariance",
443443
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f70_get_suptitle_live_path": "invariance",
444+
"test_phase_13_46_df_audit_fixes.py::TestPhase1346AuditFixes::test_f71_scatter_range_removes_out_of_range_points": "invariance",
444445

445446
# Everything else defaults to "smoke"
446447
}

UTILS/dfextensions/dfdraw/tests/test_phase_13_46_df_audit_fixes.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,45 @@ def test_f70_get_suptitle_live_path(self):
209209
fig2 = plt.figure()
210210
assert _get_suptitle(fig2) == ""
211211
plt.close(fig2)
212+
213+
# -- F.71 — C-9 FIX1: scatter range REMOVES out-of-range points ------
214+
215+
def test_f71_scatter_range_removes_out_of_range_points(self):
216+
"""Phase 13.46.DF FIX1: scatter range= must DROP out-of-range points
217+
(point filter), not merely clip the view. Consistent with hist/profile
218+
range= excluding points from binning.
219+
220+
With explicit outliers, range='percentile_99' (and an explicit tuple)
221+
must reduce the plotted point count; range='minmax' (window == full
222+
data) must remove nothing."""
223+
rs = np.random.RandomState(0)
224+
x = np.concatenate([rs.normal(0, 1, 990),
225+
np.array([100., -100, 200, -200, 300,
226+
150, -150, 180, -180, 120])])
227+
df = pd.DataFrame({'x': x, 'y': rs.normal(0, 1, 1000),
228+
'c': rs.rand(1000)})
229+
n_in = len(df)
230+
231+
# percentile_99 drops outliers
232+
fig, ax, st = DFDraw(df).scatter('y:x', range="percentile_99")
233+
n_pct = len(ax.collections[0].get_offsets())
234+
assert n_pct < n_in, "percentile_99 must remove out-of-range points"
235+
plt.close(fig)
236+
237+
# explicit tuple drops points outside the window
238+
fig, ax, st = DFDraw(df).scatter('y:x', range=((-3, 3), (-3, 3)))
239+
n_tup = len(ax.collections[0].get_offsets())
240+
assert n_tup < n_in, "explicit-tuple range must remove out-of-range points"
241+
plt.close(fig)
242+
243+
# minmax window == full data → nothing removed
244+
df_in = df.iloc[:990] # inliers only, no extreme outliers
245+
fig, ax, st = DFDraw(df_in).scatter('y:x', range="minmax")
246+
assert len(ax.collections[0].get_offsets()) == len(df_in), \
247+
"minmax must not remove any points"
248+
plt.close(fig)
249+
250+
# parallel color array stays aligned through filtering (no length crash)
251+
fig, ax, st = DFDraw(df).scatter('y:x', color='c', range="percentile_99")
252+
assert len(ax.collections[0].get_offsets()) < n_in
253+
plt.close(fig)

0 commit comments

Comments
 (0)