@@ -1149,6 +1149,105 @@ defmodule Kernel.ParserTest do
11491149 )
11501150 end
11511151
1152+ test "invalid bare carriage return in source" do
1153+ # Bare CR at top level (already produces an unexpected-token error)
1154+ assert_syntax_error (
1155+ [ "nofile:1:1:" , "unexpected token: carriage return (column 1, code point U+000D)" ] ,
1156+ ~c" \r "
1157+ )
1158+
1159+ # CRLF is still a valid line ending at the top level
1160+ assert Code . string_to_quoted! ( "x = 1\r \n y = 2" ) ==
1161+ { :__block__ , [ ] ,
1162+ [
1163+ { := , [ line: 1 ] , [ { :x , [ line: 1 ] , nil } , 1 ] } ,
1164+ { := , [ line: 2 ] , [ { :y , [ line: 2 ] , nil } , 2 ] }
1165+ ] }
1166+
1167+ # Bare CR inside a comment (Trojan Source via comment camouflage)
1168+ assert_syntax_error (
1169+ [ "nofile:1:1:" , "invalid line break character in comment: \\ u000D" ] ,
1170+ ~c" # safe comment" ++ [ 13 ] ++ ~c" hidden_code()"
1171+ )
1172+
1173+ # CRLF correctly terminates a comment (still valid)
1174+ assert Code . string_to_quoted! ( "# comment\r \n x = 1" ) ==
1175+ { := , [ line: 2 ] , [ { :x , [ line: 2 ] , nil } , 1 ] }
1176+
1177+ # Bare CR inside a string
1178+ assert_syntax_error (
1179+ [
1180+ "nofile:1:12:" ,
1181+ "invalid line break character in string: \\ u000D. If you want to use such character, use it in its escaped \\ u000D form instead"
1182+ ] ,
1183+ [ 34 ] ++ ~c" this is a " ++ [ 13 , 34 ]
1184+ )
1185+
1186+ # Bare CR after backslash inside a string
1187+ assert_syntax_error (
1188+ [
1189+ "nofile:1:13:" ,
1190+ "invalid line break character in string: \\ u000D. If you want to use such character, use it in its escaped \\ u000D form instead"
1191+ ] ,
1192+ [ 34 ] ++ ~c" this is a " ++ [ ?\\ , 13 , 34 ]
1193+ )
1194+
1195+ # CRLF inside a string is preserved as content (same as before)
1196+ assert Code . string_to_quoted! ( [ 34 ] ++ ~c" hello" ++ [ 13 , 10 ] ++ ~c" world" ++ [ 34 ] ) ==
1197+ "hello\r \n world"
1198+
1199+ # Bare CR inside a charlist
1200+ assert_syntax_error (
1201+ [ "invalid line break character in string: \\ u000D" ] ,
1202+ [ 39 ] ++ ~c" this is a " ++ [ 13 , 39 ]
1203+ )
1204+
1205+ # Bare CR inside a heredoc
1206+ assert_syntax_error (
1207+ [ "invalid line break character in string: \\ u000D" ] ,
1208+ ~c" \" \" \" \n hello" ++ [ 13 ] ++ ~c" world\n \" \" \" "
1209+ )
1210+
1211+ # Bare CR inside a sigil
1212+ assert_syntax_error (
1213+ [ "invalid line break character in string: \\ u000D" ] ,
1214+ ~c" ~s(hello" ++ [ 13 ] ++ ~c" world)"
1215+ )
1216+
1217+ # Bare CR inside a quoted atom
1218+ assert_syntax_error (
1219+ [ "invalid line break character in string: \\ u000D" ] ,
1220+ ~c" :\" foo" ++ [ 13 ] ++ ~c" bar\" "
1221+ )
1222+
1223+ # Bare CR inside a quoted keyword
1224+ assert_syntax_error (
1225+ [ "invalid line break character in string: \\ u000D" ] ,
1226+ ~c" [\" foo" ++ [ 13 ] ++ ~c" bar\" : 1]"
1227+ )
1228+
1229+ # Bare CR inside a quoted call (quoted identifier)
1230+ assert_syntax_error (
1231+ [ "invalid line break character in string: \\ u000D" ] ,
1232+ ~c" x.\" foo" ++ [ 13 ] ++ ~c" bar\" "
1233+ )
1234+
1235+ # Bare CR after ? (char literal)
1236+ assert_syntax_error (
1237+ [ "nofile:1:1:" , "invalid bare carriage return after ?" ] ,
1238+ ~c" ?" ++ [ 13 ]
1239+ )
1240+
1241+ # Bare CR after ?\ (char literal escape)
1242+ assert_syntax_error (
1243+ [ "nofile:1:1:" , "invalid bare carriage return after ?\\ " ] ,
1244+ ~c" ?\\ " ++ [ 13 ]
1245+ )
1246+
1247+ # ?\r (the proper escape) is still valid
1248+ assert Code . string_to_quoted! ( ~c" ?\\ r" ) == ?\r
1249+ end
1250+
11521251 test "reserved tokens" do
11531252 assert_syntax_error ( [ "nofile:1:1:" , "reserved token: __aliases__" ] , ~c" __aliases__" )
11541253 assert_syntax_error ( [ "nofile:1:1:" , "reserved token: __block__" ] , ~c" __block__" )
0 commit comments