@@ -138,12 +138,30 @@ def _read_lines_reversed(
138138 filepath : Path , start_offset : Optional [int ] = None , chunk_size : int = 8192
139139 ) -> Generator [Tuple [bytes , int ], None , None ]:
140140 """
141- A memory-efficient generator that yields lines from a file in reverse order with their start byte offsets.
142- It always starts at the file’s end, so reading top lines of large files may be slow .
141+ A generator that yields lines from a file in reverse order, along with the byte
142+ offset of the start of each line. This is memory-efficient for large files.
143143 """
144144 with open (filepath , "rb" ) as f :
145145 f .seek (0 , os .SEEK_END )
146- cursor = f .tell ()
146+ file_size = f .tell ()
147+ cursor = file_size
148+
149+ # If a start_offset is provided, optimize by starting the read
150+ # from a more specific location instead of the end of the file.
151+ if start_offset is not None and start_offset < file_size :
152+ # To get the full content of the line that straddles the offset,
153+ # we need to find its end (the next newline character).
154+ f .seek (start_offset )
155+ chunk = f .read (chunk_size )
156+ newline_pos = chunk .find (b"\n " )
157+ if newline_pos != - 1 :
158+ # Found the end of the line. The cursor for reverse reading
159+ # should start from this point to include the full line.
160+ cursor = start_offset + newline_pos + 1
161+ else :
162+ # No newline found, which means the rest of the file is one line.
163+ # The default cursor pointing to file_size is correct.
164+ pass
147165
148166 buffer = b""
149167
0 commit comments