Skip to content

Commit 219382f

Browse files
author
miranov25
committed
Fix: interval label sorting for negative ranges
- _interval_sort_key() extracts left boundary from labels like '-0.39--0.00' - Finds separator dash (first '-' after digit), not negative sign - Tuple key (priority, value) handles numbers, intervals, and strings - Fixes wrong legend order for negative group_by_quantiles ranges - 58/58 tests passing, no regressions
1 parent 0e9da40 commit 219382f

1 file changed

Lines changed: 45 additions & 10 deletions

File tree

UTILS/dfextensions/dfdraw/plots/profile.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,48 @@ def _format_interval_label(interval) -> str:
6363
return f"{interval.left:{fmt}}-{interval.right:{fmt}}"
6464

6565

66+
def _interval_sort_key(label):
67+
"""
68+
Sort key for group labels: plain numbers, interval labels, or strings.
69+
70+
Returns a tuple (priority, value) so numbers sort before strings:
71+
- (0, float) for numeric labels and interval left boundaries
72+
- (1, str) for non-numeric string labels
73+
74+
Handles interval labels like '0.01-0.40' or '-0.39--0.00' by extracting
75+
the left boundary. NaN sorts to end.
76+
77+
Parameters
78+
----------
79+
label : str or any
80+
Group label to sort.
81+
82+
Returns
83+
-------
84+
tuple
85+
(priority, sort_value) for consistent ordering.
86+
"""
87+
if pd.isna(label):
88+
return (2, 0)
89+
s = str(label)
90+
# Try plain number first
91+
try:
92+
return (0, float(s))
93+
except ValueError:
94+
pass
95+
# Interval label: left boundary is before the separator dash.
96+
# The separator dash is the first '-' that follows a digit.
97+
# For "-0.39--0.00": skip leading '-' (negative sign), find next '-' after digit.
98+
try:
99+
for i in range(1, len(s)):
100+
if s[i] == '-' and s[i-1].isdigit():
101+
return (0, float(s[:i]))
102+
except (ValueError, IndexError):
103+
pass
104+
# String fallback — alphabetical
105+
return (1, s)
106+
107+
66108
def draw_profile(
67109
df: pd.DataFrame,
68110
x: Union[str, pd.Series, np.ndarray],
@@ -488,24 +530,17 @@ def _draw_profile_grouped(
488530
groups = df[group_by].unique()
489531

490532
# Phase 13.12.DF F4: Sort groups
533+
# Phase 13.14.DF: Use _interval_sort_key for correct negative interval sorting
491534
if sort_groups:
492-
try:
493-
# Try numeric sort first
494-
groups = sorted(groups, key=lambda x: float(x) if not pd.isna(x) else float('inf'))
495-
except (ValueError, TypeError):
496-
# Fall back to string sort
497-
groups = sorted(groups, key=str)
535+
groups = sorted(groups, key=_interval_sort_key)
498536

499537
# Top-K filtering
500538
if top_k is not None and len(groups) > top_k:
501539
counts = df[group_by].value_counts()
502540
top_groups = counts.head(top_k).index.tolist()
503541
# Preserve sort order
504542
if sort_groups:
505-
try:
506-
top_groups = sorted(top_groups, key=lambda x: float(x) if not pd.isna(x) else float('inf'))
507-
except (ValueError, TypeError):
508-
top_groups = sorted(top_groups, key=str)
543+
top_groups = sorted(top_groups, key=_interval_sort_key)
509544
groups = top_groups
510545

511546
# Color palette

0 commit comments

Comments
 (0)