Skip to content

Commit cda0eb7

Browse files
authored
Merge pull request #2950 from ksss/buffer-character-offset-cache
Speed up RBS::InlineParser on large sources with non-ASCII characters
2 parents ddd0a1f + 6a0f671 commit cda0eb7

4 files changed

Lines changed: 74 additions & 17 deletions

File tree

lib/rbs/ast/ruby/comment_block.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def initialize(source_buffer, comments)
1515
prefix_str = "# "
1616

1717
ranges = [] #: Array[Range[Integer]]
18+
byte_ranges = [] #: Array[Range[Integer]]
1819

1920
comments.each do |comment|
2021
tuple = [comment, 2] #: [Prism::Comment, Integer]
@@ -25,12 +26,13 @@ def initialize(source_buffer, comments)
2526

2627
offsets << tuple
2728

28-
start_char = comment.location.start_character_offset + tuple[1]
29-
end_char = comment.location.end_character_offset
29+
start_char = source_buffer.character_offset(comment.location.start_offset) + tuple[1]
30+
end_char = source_buffer.character_offset(comment.location.end_offset)
3031
ranges << (start_char ... end_char)
32+
byte_ranges << ((comment.location.start_offset + tuple[1]) ... comment.location.end_offset)
3133
end
3234

33-
@comment_buffer = source_buffer.sub_buffer(lines: ranges)
35+
@comment_buffer = source_buffer.sub_buffer(lines: ranges, byte_lines_hint: byte_ranges)
3436
end
3537

3638
def leading?
@@ -53,7 +55,7 @@ def end_line
5355

5456
def line_starts
5557
offsets.map do |comment, prefix_size|
56-
comment.location.start_character_offset + prefix_size
58+
comment_buffer.character_offset(comment.location.start_offset) + prefix_size
5759
end
5860
end
5961

lib/rbs/ast/ruby/helpers/location_helper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ module Ruby
66
module Helpers
77
module LocationHelper
88
def rbs_location(location)
9-
Location.new(buffer, location.start_character_offset, location.end_character_offset)
9+
Location.new(buffer, buffer.character_offset(location.start_offset), buffer.character_offset(location.end_offset))
1010
end
1111
end
1212
end

lib/rbs/buffer.rb

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,25 +87,62 @@ def inspect
8787
"#<RBS::Buffer:#{__id__} @name=#{name}, @content=#{content.bytesize} bytes, @lines=#{ranges.size} lines,>"
8888
end
8989

90+
def character_offset(byte_offset)
91+
top = top_buffer
92+
return top.character_offset(byte_offset) unless top.equal?(self)
93+
94+
keys, vals = (@character_offset_cache ||= [[0], [0]])
95+
96+
idx = keys.bsearch_index { |k| k > byte_offset }
97+
lo = idx ? idx - 1 : keys.size - 1
98+
99+
base_byte = keys[lo]
100+
base_char = vals[lo]
101+
delta = byte_offset - base_byte
102+
return base_char if delta == 0
103+
104+
result = base_char + (content.byteslice(base_byte, delta) or raise).length
105+
106+
if base_byte == keys[-1]
107+
keys << byte_offset
108+
vals << result
109+
end
110+
111+
result
112+
end
113+
90114
def rbs_location(location, loc2=nil)
115+
top = top_buffer
91116
if loc2
92-
Location.new(self.top_buffer, location.start_character_offset, loc2.end_character_offset)
117+
Location.new(top, character_offset(location.start_offset), character_offset(loc2.end_offset))
93118
else
94-
Location.new(self.top_buffer, location.start_character_offset, location.end_character_offset)
119+
Location.new(top, character_offset(location.start_offset), character_offset(location.end_offset))
95120
end
96121
end
97122

98-
def sub_buffer(lines:)
123+
def sub_buffer(lines:, byte_lines_hint: nil)
99124
buf = +""
100-
lines.each_with_index do |range, index|
101-
start_pos = range.begin
102-
end_pos = range.end
103-
slice = content[start_pos...end_pos] or raise
104-
if slice.include?("\n")
105-
raise "Line #{index + 1} cannot contain newline character."
125+
126+
if byte_lines_hint
127+
byte_lines_hint.each_with_index do |range, index|
128+
slice = content.byteslice(range.begin, range.end - range.begin) or raise
129+
if slice.include?("\n")
130+
raise "Line #{index + 1} cannot contain newline character."
131+
end
132+
buf << slice
133+
buf << "\n"
134+
end
135+
else
136+
lines.each_with_index do |range, index|
137+
start_pos = range.begin
138+
end_pos = range.end
139+
slice = content[start_pos...end_pos] or raise
140+
if slice.include?("\n")
141+
raise "Line #{index + 1} cannot contain newline character."
142+
end
143+
buf << slice
144+
buf << "\n"
106145
end
107-
buf << slice
108-
buf << "\n"
109146
end
110147

111148
buf.chomp!

sig/buffer.rbs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ module RBS
6060
def rbs_location: (Prism::Location) -> Location
6161
| (Prism::Location, Prism::Location) -> Location
6262

63+
# Translate a byte offset (into the top buffer's source) to a character offset.
64+
#
65+
# Resolution is delegated to the top buffer, which keeps a sparse cache of
66+
# resolved (byte, char) pairs so successive calls scan only the delta from the
67+
# nearest cached pair. Amortizes to O(content_size) across all calls; a single
68+
# call is one byteslice + length.
69+
#
70+
def character_offset: (Integer byte_offset) -> Integer
71+
72+
# Sparse cache backing `#character_offset`: a pair `[byte_keys, char_values]`
73+
# kept in ascending byte order to support binary search.
74+
@character_offset_cache: [Array[Integer], Array[Integer]]?
75+
6376
# Construct a buffer from substrings of this buffer.
6477
#
6578
# The returned buffer contains lines from given ranges.
@@ -75,7 +88,12 @@ module RBS
7588
# buffer.sub_buffer(lines: [5..7]) # => Raises an error because the range contains newline
7689
# ```
7790
#
78-
%a{pure} def sub_buffer: (lines: Array[Range[Integer]]) -> Buffer
91+
# `byte_lines_hint:` is an optional performance hint: byte ranges corresponding
92+
# to `lines:`. When provided, slicing uses `byteslice` (O(slice_size) per line)
93+
# instead of `content[char_range]`, which on a multi-byte string is O(content_size)
94+
# per call. Result is identical either way.
95+
#
96+
%a{pure} def sub_buffer: (lines: Array[Range[Integer]], ?byte_lines_hint: Array[Range[Integer]]?) -> Buffer
7997

8098
%a{pure} def parent_buffer: () -> Buffer?
8199

0 commit comments

Comments
 (0)