Skip to content

Commit 9f00f56

Browse files
committed
Fix natural sorting for count labels
Fix #62 Reported by @forthrin Thanks!
1 parent 3398504 commit 9f00f56

3 files changed

Lines changed: 185 additions & 5 deletions

File tree

lib/youplot/backends/processing.rb

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ def count_values(arr, tally: true, reverse: false)
1616
arr.value_counts(dropna: false)
1717
end
1818

19+
sort_cache = {}
20+
1921
# sorting
2022
result = result.sort do |a, b|
2123
# compare values
2224
r = b[1] <=> a[1]
2325
# If the values are the same, compare by name
24-
r = a[0] <=> b[0] if r.zero?
26+
r = natural_compare(a[0], b[0], sort_cache) if r.zero?
2527
r
2628
end
2729

@@ -31,6 +33,111 @@ def count_values(arr, tally: true, reverse: false)
3133
# prepare for barplot
3234
result.transpose
3335
end
36+
37+
# Natural order comparison for tie-breaking when counts are equal.
38+
# Fast paths handle text-only and pure numeric labels.
39+
# Mixed labels still use chunked comparison (e.g. "chr1" vs "chr10").
40+
def natural_compare(a, b, cache = nil)
41+
aa = natural_sort_key(a, cache)
42+
bb = natural_sort_key(b, cache)
43+
44+
# Fast path: both labels are text-only, so plain string comparison is enough.
45+
return aa[:string] <=> bb[:string] if aa[:type] == :text && bb[:type] == :text
46+
47+
# Fast path: both labels are pure numbers, so compare numerically first.
48+
if aa[:type] == :numeric && bb[:type] == :numeric
49+
r = aa[:numeric] <=> bb[:numeric]
50+
return r unless r.zero?
51+
52+
# Tiebreaker for equivalent numeric values (e.g. "1" and "01")
53+
return aa[:string] <=> bb[:string]
54+
end
55+
56+
# Fallback path: at least one label mixes text and digits.
57+
ta = ensure_natural_tokens(aa)
58+
tb = ensure_natural_tokens(bb)
59+
max = [ta.size, tb.size].max
60+
61+
0.upto(max - 1) do |i|
62+
xa = ta[i]
63+
xb = tb[i]
64+
65+
return -1 if xa.nil?
66+
return 1 if xb.nil?
67+
68+
r = if xa[0] == :num && xb[0] == :num
69+
compare_integer_strings(xa[1], xb[1])
70+
else
71+
xa[1] <=> xb[1]
72+
end
73+
74+
return r unless r.zero?
75+
end
76+
77+
aa[:string] <=> bb[:string]
78+
end
79+
80+
# Classifies a value for natural sorting and caches the result per label.
81+
def natural_sort_key(value, cache = nil)
82+
str = value.to_s
83+
return cache[str] if cache && cache.key?(str)
84+
85+
key = if str.match?(/\d/)
86+
numeric = parse_numeric(str)
87+
if numeric
88+
# Pure numeric labels get a dedicated fast path.
89+
{ type: :numeric, string: str, numeric: numeric }
90+
else
91+
# Mixed labels fall back to chunked natural comparison.
92+
{ type: :mixed, string: str, tokens: nil }
93+
end
94+
else
95+
# Text-only labels get a dedicated fast path.
96+
{ type: :text, string: str, tokens: nil }
97+
end
98+
99+
cache ? cache[str] = key : key
100+
end
101+
102+
# Memoizes token pairs for fallback chunked comparison.
103+
def ensure_natural_tokens(key)
104+
key[:tokens] ||= natural_tokens(key[:string])
105+
end
106+
107+
# Parses a string as a numeric value if it matches pure number format.
108+
# Returns Float or nil.
109+
def parse_numeric(str)
110+
return nil unless str.match?(/\A[+-]?(?:\d+(?:\.\d+)?|\.\d+)\z/)
111+
112+
str.to_f
113+
end
114+
115+
# Splits a string into [type, token] pairs for natural comparison.
116+
# Type is :num for digit-only chunks, :text for anything else.
117+
# E.g. "chr10" => [[:text, "chr"], [:num, "10"]]
118+
def natural_tokens(str)
119+
str.scan(/\d+|\D+/).map do |tok|
120+
kind = tok.match?(/\A\d+\z/) ? :num : :text
121+
[kind, tok]
122+
end
123+
end
124+
125+
# Compares two numeric strings, handling leading zeros.
126+
# Order: by length (sans leading zeros), then numeric value, then original.
127+
def compare_integer_strings(a, b)
128+
aa = a.sub(/\A0+/, '')
129+
bb = b.sub(/\A0+/, '')
130+
aa = '0' if aa.empty?
131+
bb = '0' if bb.empty?
132+
133+
r = aa.length <=> bb.length
134+
return r unless r.zero?
135+
136+
r = aa <=> bb
137+
return r unless r.zero?
138+
139+
a <=> b
140+
end
34141
end
35142
end
36143
end

test/fixtures/simple-count.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
┌ ┐
2-
-10 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
3-
-20 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
4-
-30 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
5-
-40 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
62
-50 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
3+
-40 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
4+
-30 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
5+
-20 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
6+
-10 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
77
10 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
88
20 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0
99
30 ┤■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1.0

test/youplot/backends/processing_test.rb

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,79 @@ class ProcessingTest < Test::Unit::TestCase
99
assert_equal([%i[c b a], [3, 2, 1]], @m.count_values(%i[a b b c c c]))
1010
end
1111

12+
test :count_values_natural_sort_integer_labels do
13+
@m = YouPlot::Backends::Processing
14+
assert_equal([%w[19 187 1765], [1, 1, 1]], @m.count_values(%w[1765 187 19]))
15+
end
16+
17+
test :count_values_natural_sort_alnum_labels do
18+
@m = YouPlot::Backends::Processing
19+
assert_equal([%w[a1 a2 a10], [1, 1, 1]], @m.count_values(%w[a10 a2 a1]))
20+
end
21+
22+
test :count_values_natural_sort_negative_numeric_labels do
23+
@m = YouPlot::Backends::Processing
24+
assert_equal([%w[-20 -10 10], [1, 1, 1]], @m.count_values(%w[-10 10 -20]))
25+
end
26+
27+
test :count_values_natural_sort_chr_labels do
28+
@m = YouPlot::Backends::Processing
29+
assert_equal(
30+
[%w[chr1 chr2 chr10 chr11 chr12], [1, 1, 1, 1, 1]],
31+
@m.count_values(%w[chr12 chr1 chr11 chr10 chr2])
32+
)
33+
end
34+
35+
test :count_values_natural_sort_text_only_labels do
36+
@m = YouPlot::Backends::Processing
37+
assert_equal([%w[chrM chrX chrY], [1, 1, 1]], @m.count_values(%w[chrY chrX chrM]))
38+
end
39+
40+
test :count_values_natural_sort_leading_zeros do
41+
@m = YouPlot::Backends::Processing
42+
assert_equal([%w[01 1 2], [1, 1, 1]], @m.count_values(%w[2 1 01]))
43+
end
44+
45+
test :count_values_natural_sort_mixed_numeric_and_text do
46+
@m = YouPlot::Backends::Processing
47+
assert_equal([%w[2 10 abc], [1, 1, 1]], @m.count_values(%w[abc 10 2]))
48+
end
49+
50+
test :count_values_natural_sort_numeric_and_mixed_labels do
51+
@m = YouPlot::Backends::Processing
52+
assert_equal([%w[2 2a 10], [1, 1, 1]], @m.count_values(%w[10 2a 2]))
53+
end
54+
55+
test :count_values_natural_sort_ipv4_labels do
56+
@m = YouPlot::Backends::Processing
57+
assert_equal(
58+
[%w[192.168.0.2 192.168.0.10 192.168.1.1], [1, 1, 1]],
59+
@m.count_values(%w[192.168.1.1 192.168.0.10 192.168.0.2])
60+
)
61+
end
62+
63+
test :count_values_natural_sort_version_labels do
64+
@m = YouPlot::Backends::Processing
65+
assert_equal(
66+
[%w[1.2.3 1.2.10 1.10.0], [1, 1, 1]],
67+
@m.count_values(%w[1.10.0 1.2.10 1.2.3])
68+
)
69+
end
70+
71+
test :count_values_mixed_counts_with_ties do
72+
@m = YouPlot::Backends::Processing
73+
# "a" appears 3 times (top), then "chr1" and "chr10" tie at 1 each
74+
assert_equal(
75+
[%w[a chr1 chr10], [3, 1, 1]],
76+
@m.count_values(%w[a a a chr10 chr1])
77+
)
78+
end
79+
80+
test :count_values_reverse_preserves_semantics do
81+
@m = YouPlot::Backends::Processing
82+
assert_equal([%w[1765 187 19], [1, 1, 1]], @m.count_values(%w[1765 187 19], reverse: true))
83+
end
84+
1285
test :count_values_non_tally do
1386
@m = YouPlot::Backends::Processing
1487
assert_equal([%i[a b c], [3, 2, 1]], @m.count_values(%i[a a a b b c], tally: false))

0 commit comments

Comments
 (0)