Skip to content

Commit 7b6b934

Browse files
l46kokcopybara-github
authored andcommitted
Reject invalid unicode literals in the parser
PiperOrigin-RevId: 893620376
1 parent 4d00593 commit 7b6b934

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

common/src/main/java/dev/cel/common/internal/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,9 @@ private static <T> void decodeString(
207207
continue;
208208
}
209209
skipNewline = false;
210+
if (codePoint >= MIN_SURROGATE && codePoint <= MAX_SURROGATE) {
211+
throw new ParseException("Invalid unicode code point", seqOffset);
212+
}
210213
buffer.appendCodePoint(codePoint);
211214
} else {
212215
// Normalize '\r' and '\r\n' to '\n'.
@@ -231,6 +234,9 @@ private static <T> void decodeString(
231234
// For raw literals, all escapes are valid and those characters come through literally in
232235
// the string.
233236
buffer.appendCodePoint('\\');
237+
if (codePoint >= MIN_SURROGATE && codePoint <= MAX_SURROGATE) {
238+
throw new ParseException("Invalid unicode code point", seqOffset);
239+
}
234240
buffer.appendCodePoint(codePoint);
235241
continue;
236242
}

parser/src/test/java/dev/cel/parser/CelParserParameterizedTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,12 @@ public void parser_errors() {
247247
runTest(PARSER, "1.exists(2, 3)");
248248
runTest(PARSER, "1 + +");
249249
runTest(PARSER, "\"\\xFh\"");
250+
runTest(PARSER, "\"\\uD800\"");
251+
runTest(PARSER, "\"\\uDFFF\"");
252+
runTest(PARSER, "\"\\U0000D800\"");
253+
runTest(PARSER, "\"\\U0000DFFF\"");
250254
runTest(PARSER, "\"\\a\\b\\f\\n\\r\\t\\v\\'\\\"\\\\\\? Illegal escape \\>\"");
255+
251256
runTest(PARSER, "as");
252257
runTest(PARSER, "break");
253258
runTest(PARSER, "const");

parser/src/test/resources/parser_errors.baseline

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,30 @@ ERROR: <input>:1:7: mismatched input '<EOF>' expecting {'[', '{', '(', '.', '-',
7373
| "\xFh"
7474
| ......^
7575

76+
I: "\uD800"
77+
=====>
78+
E: ERROR: <input>:1:1: Invalid unicode code point
79+
| "\uD800"
80+
| ^
81+
82+
I: "\uDFFF"
83+
=====>
84+
E: ERROR: <input>:1:1: Invalid unicode code point
85+
| "\uDFFF"
86+
| ^
87+
88+
I: "\U0000D800"
89+
=====>
90+
E: ERROR: <input>:1:1: Invalid unicode code point
91+
| "\U0000D800"
92+
| ^
93+
94+
I: "\U0000DFFF"
95+
=====>
96+
E: ERROR: <input>:1:1: Invalid unicode code point
97+
| "\U0000DFFF"
98+
| ^
99+
76100
I: "\a\b\f\n\r\t\v\'\"\\\? Illegal escape \>"
77101
=====>
78102
E: ERROR: <input>:1:1: token recognition error at: '"\a\b\f\n\r\t\v\'\"\\\? Illegal escape \>'
@@ -344,4 +368,4 @@ ERROR: <input>:1:6: unsupported syntax '`'
344368
| .....^
345369
ERROR: <input>:1:9: missing ')' at '<EOF>'
346370
| has(.`.`
347-
| ........^
371+
| ........^

0 commit comments

Comments
 (0)