@@ -16,12 +16,14 @@ def count_values(arr, tally: true, reverse: false)
1616 arr . value_counts ( dropna : false )
1717 end
1818
19+ sort_cache = { }
20+
1921 # sorting
2022 result = result . sort do |a , b |
2123 # compare values
2224 r = b [ 1 ] <=> a [ 1 ]
2325 # If the values are the same, compare by name
24- r = a [ 0 ] <=> b [ 0 ] if r . zero?
26+ r = natural_compare ( a [ 0 ] , b [ 0 ] , sort_cache ) if r . zero?
2527 r
2628 end
2729
@@ -31,6 +33,111 @@ def count_values(arr, tally: true, reverse: false)
3133 # prepare for barplot
3234 result . transpose
3335 end
36+
37+ # Natural order comparison for tie-breaking when counts are equal.
38+ # Fast paths handle text-only and pure numeric labels.
39+ # Mixed labels still use chunked comparison (e.g. "chr1" vs "chr10").
40+ def natural_compare ( a , b , cache = nil )
41+ aa = natural_sort_key ( a , cache )
42+ bb = natural_sort_key ( b , cache )
43+
44+ # Fast path: both labels are text-only, so plain string comparison is enough.
45+ return aa [ :string ] <=> bb [ :string ] if aa [ :type ] == :text && bb [ :type ] == :text
46+
47+ # Fast path: both labels are pure numbers, so compare numerically first.
48+ if aa [ :type ] == :numeric && bb [ :type ] == :numeric
49+ r = aa [ :numeric ] <=> bb [ :numeric ]
50+ return r unless r . zero?
51+
52+ # Tiebreaker for equivalent numeric values (e.g. "1" and "01")
53+ return aa [ :string ] <=> bb [ :string ]
54+ end
55+
56+ # Fallback path: at least one label mixes text and digits.
57+ ta = ensure_natural_tokens ( aa )
58+ tb = ensure_natural_tokens ( bb )
59+ max = [ ta . size , tb . size ] . max
60+
61+ 0 . upto ( max - 1 ) do |i |
62+ xa = ta [ i ]
63+ xb = tb [ i ]
64+
65+ return -1 if xa . nil?
66+ return 1 if xb . nil?
67+
68+ r = if xa [ 0 ] == :num && xb [ 0 ] == :num
69+ compare_integer_strings ( xa [ 1 ] , xb [ 1 ] )
70+ else
71+ xa [ 1 ] <=> xb [ 1 ]
72+ end
73+
74+ return r unless r . zero?
75+ end
76+
77+ aa [ :string ] <=> bb [ :string ]
78+ end
79+
80+ # Classifies a value for natural sorting and caches the result per label.
81+ def natural_sort_key ( value , cache = nil )
82+ str = value . to_s
83+ return cache [ str ] if cache && cache . key? ( str )
84+
85+ key = if str . match? ( /\d / )
86+ numeric = parse_numeric ( str )
87+ if numeric
88+ # Pure numeric labels get a dedicated fast path.
89+ { type : :numeric , string : str , numeric : numeric }
90+ else
91+ # Mixed labels fall back to chunked natural comparison.
92+ { type : :mixed , string : str , tokens : nil }
93+ end
94+ else
95+ # Text-only labels get a dedicated fast path.
96+ { type : :text , string : str , tokens : nil }
97+ end
98+
99+ cache ? cache [ str ] = key : key
100+ end
101+
102+ # Memoizes token pairs for fallback chunked comparison.
103+ def ensure_natural_tokens ( key )
104+ key [ :tokens ] ||= natural_tokens ( key [ :string ] )
105+ end
106+
107+ # Parses a string as a numeric value if it matches pure number format.
108+ # Returns Float or nil.
109+ def parse_numeric ( str )
110+ return nil unless str . match? ( /\A [+-]?(?:\d +(?:\. \d +)?|\. \d +)\z / )
111+
112+ str . to_f
113+ end
114+
115+ # Splits a string into [type, token] pairs for natural comparison.
116+ # Type is :num for digit-only chunks, :text for anything else.
117+ # E.g. "chr10" => [[:text, "chr"], [:num, "10"]]
118+ def natural_tokens ( str )
119+ str . scan ( /\d +|\D +/ ) . map do |tok |
120+ kind = tok . match? ( /\A \d +\z / ) ? :num : :text
121+ [ kind , tok ]
122+ end
123+ end
124+
125+ # Compares two numeric strings, handling leading zeros.
126+ # Order: by length (sans leading zeros), then numeric value, then original.
127+ def compare_integer_strings ( a , b )
128+ aa = a . sub ( /\A 0+/ , '' )
129+ bb = b . sub ( /\A 0+/ , '' )
130+ aa = '0' if aa . empty?
131+ bb = '0' if bb . empty?
132+
133+ r = aa . length <=> bb . length
134+ return r unless r . zero?
135+
136+ r = aa <=> bb
137+ return r unless r . zero?
138+
139+ a <=> b
140+ end
34141 end
35142 end
36143end
0 commit comments