Skip to content

Commit 76d3fba

Browse files
authored
Disallow raw CR line ending in strings, comments and after ? (#15307)
1 parent b8a9dc6 commit 76d3fba

4 files changed

Lines changed: 115 additions & 0 deletions

File tree

lib/elixir/src/elixir_interpolation.erl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ extract([$\\, $\r, $\n | Rest], Buffer, Output, Line, _Column, Scope, Interpol,
3333
extract([$\\, $\n | Rest], Buffer, Output, Line, _Column, Scope, Interpol, Last) ->
3434
extract_nl(Rest, [$\n, $\\ | Buffer], Output, Line, Scope, Interpol, Last);
3535

36+
extract([$\r, $\n | Rest], Buffer, Output, Line, _Column, Scope, Interpol, Last) ->
37+
extract_nl(Rest, [$\n, $\r | Buffer], Output, Line, Scope, Interpol, Last);
38+
3639
extract([$\n | Rest], Buffer, Output, Line, _Column, Scope, Interpol, Last) ->
3740
extract_nl(Rest, [$\n | Buffer], Output, Line, Scope, Interpol, Last);
3841

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,11 @@ tokenize([$~, H | _T] = Original, Line, Column, Scope, Tokens) when ?is_upcase(H
216216
% elixir_errors.erl as by default {char, _, _} tokens are "hijacked" by Erlang
217217
% and printed with Erlang syntax ($a) in the parser's error messages.
218218

219+
%% Reject bare carriage return after ?\ (CR is only valid as part of CRLF line ending).
220+
tokenize([$?, $\\, $\r | _Rest] = Original, Line, Column, Scope, Tokens) ->
221+
Reason = {?LOC(Line, Column), "invalid bare carriage return after ?\\, use ?\\r instead: ", "\\u000D"},
222+
error(Reason, Original, Scope, Tokens);
223+
219224
tokenize([$?, $\\, H | T], Line, Column, Scope, Tokens) ->
220225
Char = elixir_interpolation:unescape_map(H),
221226

@@ -248,6 +253,11 @@ tokenize([$?, $\\, H | T], Line, Column, Scope, Tokens) ->
248253
tokenize(T, Line, Column + 3, NewScope, [Token | Tokens])
249254
end;
250255

256+
%% Reject bare carriage return after ? (CR is only valid as part of CRLF line ending).
257+
tokenize([$?, $\r | _Rest] = Original, Line, Column, Scope, Tokens) ->
258+
Reason = {?LOC(Line, Column), "invalid bare carriage return after ?, use ?\\r instead: ", "\\u000D"},
259+
error(Reason, Original, Scope, Tokens);
260+
251261
tokenize([$?, Char | T], Line, Column, Scope, Tokens) ->
252262
NewScope = case handle_char(Char) of
253263
{Escape, Name} ->

lib/elixir/src/elixir_tokenizer.hrl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@
3636

3737
%% Unsupported newlines
3838
%% https://www.unicode.org/reports/tr55/
39+
%% CR (0x000D) is included: a bare CR not followed by LF is rejected.
40+
%% Call sites must handle CRLF explicitly before the ?break check.
3941
-define(break(C), C =:= 16#000B orelse
4042
C =:= 16#000C orelse
43+
C =:= 16#000D orelse
4144
C =:= 16#0085 orelse
4245
C =:= 16#2028 orelse
4346
C =:= 16#2029).

lib/elixir/test/elixir/kernel/parser_test.exs

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,105 @@ defmodule Kernel.ParserTest do
11491149
)
11501150
end
11511151

1152+
test "invalid bare carriage return in source" do
1153+
# Bare CR at top level (already produces an unexpected-token error)
1154+
assert_syntax_error(
1155+
["nofile:1:1:", "unexpected token: carriage return (column 1, code point U+000D)"],
1156+
~c"\r"
1157+
)
1158+
1159+
# CRLF is still a valid line ending at the top level
1160+
assert Code.string_to_quoted!("x = 1\r\ny = 2") ==
1161+
{:__block__, [],
1162+
[
1163+
{:=, [line: 1], [{:x, [line: 1], nil}, 1]},
1164+
{:=, [line: 2], [{:y, [line: 2], nil}, 2]}
1165+
]}
1166+
1167+
# Bare CR inside a comment (Trojan Source via comment camouflage)
1168+
assert_syntax_error(
1169+
["nofile:1:1:", "invalid line break character in comment: \\u000D"],
1170+
~c"# safe comment" ++ [13] ++ ~c"hidden_code()"
1171+
)
1172+
1173+
# CRLF correctly terminates a comment (still valid)
1174+
assert Code.string_to_quoted!("# comment\r\nx = 1") ==
1175+
{:=, [line: 2], [{:x, [line: 2], nil}, 1]}
1176+
1177+
# Bare CR inside a string
1178+
assert_syntax_error(
1179+
[
1180+
"nofile:1:12:",
1181+
"invalid line break character in string: \\u000D. If you want to use such character, use it in its escaped \\u000D form instead"
1182+
],
1183+
[34] ++ ~c"this is a " ++ [13, 34]
1184+
)
1185+
1186+
# Bare CR after backslash inside a string
1187+
assert_syntax_error(
1188+
[
1189+
"nofile:1:13:",
1190+
"invalid line break character in string: \\u000D. If you want to use such character, use it in its escaped \\u000D form instead"
1191+
],
1192+
[34] ++ ~c"this is a " ++ [?\\, 13, 34]
1193+
)
1194+
1195+
# CRLF inside a string is preserved as content (same as before)
1196+
assert Code.string_to_quoted!([34] ++ ~c"hello" ++ [13, 10] ++ ~c"world" ++ [34]) ==
1197+
"hello\r\nworld"
1198+
1199+
# Bare CR inside a charlist
1200+
assert_syntax_error(
1201+
["invalid line break character in string: \\u000D"],
1202+
[39] ++ ~c"this is a " ++ [13, 39]
1203+
)
1204+
1205+
# Bare CR inside a heredoc
1206+
assert_syntax_error(
1207+
["invalid line break character in string: \\u000D"],
1208+
~c"\"\"\"\nhello" ++ [13] ++ ~c"world\n\"\"\""
1209+
)
1210+
1211+
# Bare CR inside a sigil
1212+
assert_syntax_error(
1213+
["invalid line break character in string: \\u000D"],
1214+
~c"~s(hello" ++ [13] ++ ~c"world)"
1215+
)
1216+
1217+
# Bare CR inside a quoted atom
1218+
assert_syntax_error(
1219+
["invalid line break character in string: \\u000D"],
1220+
~c":\"foo" ++ [13] ++ ~c"bar\""
1221+
)
1222+
1223+
# Bare CR inside a quoted keyword
1224+
assert_syntax_error(
1225+
["invalid line break character in string: \\u000D"],
1226+
~c"[\"foo" ++ [13] ++ ~c"bar\": 1]"
1227+
)
1228+
1229+
# Bare CR inside a quoted call (quoted identifier)
1230+
assert_syntax_error(
1231+
["invalid line break character in string: \\u000D"],
1232+
~c"x.\"foo" ++ [13] ++ ~c"bar\""
1233+
)
1234+
1235+
# Bare CR after ? (char literal)
1236+
assert_syntax_error(
1237+
["nofile:1:1:", "invalid bare carriage return after ?"],
1238+
~c"?" ++ [13]
1239+
)
1240+
1241+
# Bare CR after ?\ (char literal escape)
1242+
assert_syntax_error(
1243+
["nofile:1:1:", "invalid bare carriage return after ?\\"],
1244+
~c"?\\" ++ [13]
1245+
)
1246+
1247+
# ?\r (the proper escape) is still valid
1248+
assert Code.string_to_quoted!(~c"?\\r") == ?\r
1249+
end
1250+
11521251
test "reserved tokens" do
11531252
assert_syntax_error(["nofile:1:1:", "reserved token: __aliases__"], ~c"__aliases__")
11541253
assert_syntax_error(["nofile:1:1:", "reserved token: __block__"], ~c"__block__")

0 commit comments

Comments
 (0)