Skip to content

Commit 7d73658

Browse files
l46kokcopybara-github
authored andcommitted
Reject invalid unicode literals in the parser
PiperOrigin-RevId: 894137619
1 parent 46bae72 commit 7d73658

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

common/src/main/java/dev/cel/common/internal/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,9 @@ private static <T> void decodeString(
207207
continue;
208208
}
209209
skipNewline = false;
210+
if (codePoint >= MIN_SURROGATE && codePoint <= MAX_SURROGATE) {
211+
throw new ParseException("Invalid unicode code point", seqOffset);
212+
}
210213
buffer.appendCodePoint(codePoint);
211214
} else {
212215
// Normalize '\r' and '\r\n' to '\n'.
@@ -231,6 +234,9 @@ private static <T> void decodeString(
231234
// For raw literals, all escapes are valid and those characters come through literally in
232235
// the string.
233236
buffer.appendCodePoint('\\');
237+
if (codePoint >= MIN_SURROGATE && codePoint <= MAX_SURROGATE) {
238+
throw new ParseException("Invalid unicode code point", seqOffset);
239+
}
234240
buffer.appendCodePoint(codePoint);
235241
continue;
236242
}

parser/src/test/java/dev/cel/parser/CelParserParameterizedTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ public void parser_errors() {
248248
runTest(PARSER, "1 + +");
249249
runTest(PARSER, "\"\\xFh\"");
250250
runTest(PARSER, "\"\\a\\b\\f\\n\\r\\t\\v\\'\\\"\\\\\\? Illegal escape \\>\"");
251+
runTest(PARSER, "'\uD800'");
252+
runTest(PARSER, "'\uDFFF'");
253+
runTest(PARSER, "r\"\\\uD800\"");
254+
251255
runTest(PARSER, "as");
252256
runTest(PARSER, "break");
253257
runTest(PARSER, "const");

parser/src/test/resources/parser_errors.baseline

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,24 @@ ERROR: <input>:1:43: mismatched input '<EOF>' expecting {'[', '{', '(', '.', '-'
8585
| "\a\b\f\n\r\t\v\'\"\\\? Illegal escape \>"
8686
| ..........................................^
8787

88+
I: '?'
89+
=====>
90+
E: ERROR: <input>:1:1: Invalid unicode code point
91+
| '?'
92+
| ^
93+
94+
I: '?'
95+
=====>
96+
E: ERROR: <input>:1:1: Invalid unicode code point
97+
| '?'
98+
| ^
99+
100+
I: r"\?"
101+
=====>
102+
E: ERROR: <input>:1:1: Invalid unicode code point
103+
| r"\?"
104+
| ^
105+
88106
I: as
89107
=====>
90108
E: ERROR: <input>:1:1: reserved identifier: as

0 commit comments

Comments
 (0)