Skip to content

Commit 71914ff

Browse files
committed
Fix File.Stream Enumerable.count for files without trailing newline
The optimized count implementation was counting newline characters instead of actual lines, returning incorrect results for files that don't end with a newline (which is common for source code files). For example, a file with content "line1\nline2\nline3" (3 lines) was returning 2 instead of 3. The fix tracks the last byte read and adds 1 at EOF if the file has content but doesn't end with a newline.
1 parent d61ba91 commit 71914ff

2 files changed

Lines changed: 41 additions & 6 deletions

File tree

lib/elixir/lib/file/stream.ex

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ defmodule File.Stream do
119119

120120
counter = fn device ->
121121
device = skip_bom_and_offset(device, raw, modes)
122-
count_lines(device, path, pattern, read_function(stream), 0)
122+
count_lines(device, path, pattern, read_function(stream), 0, :empty)
123123
end
124124

125125
{:ok, open!(stream, modes, counter)}
@@ -229,21 +229,28 @@ defmodule File.Stream do
229229
for mode <- modes, mode not in [:write, :append, :trim_bom], do: mode
230230
end
231231

232-
defp count_lines(device, path, pattern, read, count) do
232+
defp count_lines(device, path, pattern, read, count, last_byte) do
233233
case read.(device) do
234+
data when is_binary(data) and byte_size(data) > 0 ->
235+
newlines = length(:binary.matches(data, pattern))
236+
last = :binary.last(data)
237+
count_lines(device, path, pattern, read, count + newlines, last)
238+
234239
data when is_binary(data) ->
235-
count_lines(device, path, pattern, read, count + count_lines(data, pattern))
240+
count_lines(device, path, pattern, read, count, last_byte)
236241

237242
:eof ->
238-
count
243+
case last_byte do
244+
:empty -> 0
245+
?\n -> count
246+
_ -> count + 1
247+
end
239248

240249
{:error, reason} ->
241250
raise File.Error, reason: reason, action: "stream", path: path
242251
end
243252
end
244253

245-
defp count_lines(data, pattern), do: length(:binary.matches(data, pattern))
246-
247254
defp read_function(%{raw: true}), do: &IO.binread(&1, @read_ahead_size)
248255
defp read_function(%{raw: false}), do: &IO.read(&1, @read_ahead_size)
249256
end

lib/elixir/test/elixir/file/stream_test.exs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,34 @@ defmodule File.StreamTest do
6565
assert Enum.count(stream) == 2
6666
end
6767

68+
test "counts lines without trailing newline" do
69+
no_trailing = tmp_path("no_trailing.txt")
70+
single_line = tmp_path("single_line.txt")
71+
empty_file = tmp_path("empty.txt")
72+
73+
try do
74+
File.write!(no_trailing, "line1\nline2\nline3")
75+
File.write!(single_line, "hello")
76+
File.write!(empty_file, "")
77+
78+
# 3 lines, no trailing newline
79+
stream = stream!(@node, no_trailing)
80+
assert Enum.count(stream) == 3
81+
82+
# 1 line, no newline at all
83+
stream = stream!(@node, single_line)
84+
assert Enum.count(stream) == 1
85+
86+
# empty file
87+
stream = stream!(@node, empty_file)
88+
assert Enum.count(stream) == 0
89+
after
90+
File.rm(no_trailing)
91+
File.rm(single_line)
92+
File.rm(empty_file)
93+
end
94+
end
95+
6896
test "reads and writes lines" do
6997
src = fixture_path("file.txt")
7098
dest = tmp_path("tmp_test.txt")

0 commit comments

Comments
 (0)