Skip to content

Commit 3e8e318

Browse files
authored
[java] Fix JSON parser EOF sentinel collision with U+FFFF (#17737)
Input used `(char) -1` as its EOF sentinel. That value is 0xFFFF - a valid Unicode code unit that can legitimately appear in JSON. Any string containing it was mis-reported as an unterminated string. Switch `peek()`/`read()` to return `int` with -1 as the sentinel (matching Reader.read()) so the sentinel cannot collide with a valid UTF-16 code unit.
1 parent 0779019 commit 3e8e318

3 files changed

Lines changed: 46 additions & 22 deletions

File tree

java/src/org/openqa/selenium/json/Input.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@
2929
* read characters in the input buffer.
3030
*/
3131
class Input {
32-
/** end-of-file indicator (0xFFFD) */
33-
public static final char EOF = (char) -1; // NOTE: Produces Unicode replacement character (0xFFFD)
32+
/**
33+
* End-of-input sentinel returned by {@link #peek()} and {@link #read()}.
34+
*
35+
* <p>Value {@code -1} mirrors {@link java.io.Reader#read()} and — unlike a {@code char} sentinel
36+
* — cannot collide with any valid UTF-16 code unit (including U+FFFF).
37+
*/
38+
public static final int EOF = -1;
3439

3540
/** the number of chars to buffer */
3641
private static final int BUFFER_SIZE = 4096;
@@ -64,18 +69,20 @@ public Input(Reader source) {
6469
/**
6570
* Extract the next character from the input without consuming it.
6671
*
67-
* @return the next input character; {@link #EOF} if input is exhausted
72+
* @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if
73+
* input is exhausted
6874
*/
69-
public char peek() {
75+
public int peek() {
7076
return fill() ? buffer[position + 1] : EOF;
7177
}
7278

7379
/**
7480
* Read and consume the next character from the input.
7581
*
76-
* @return the next input character; {@link #EOF} if input is exhausted
82+
* @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if
83+
* input is exhausted
7784
*/
78-
public char read() {
85+
public int read() {
7986
return fill() ? buffer[++position] : EOF;
8087
}
8188

java/src/org/openqa/selenium/json/JsonInput.java

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ public JsonType peek() {
165165
return JsonType.END;
166166

167167
default:
168-
char c = input.read();
169-
throw new JsonException("Unable to determine type from: " + c + ". " + input);
168+
int c = input.read();
169+
throw new JsonException("Unable to determine type from: " + (char) c + ". " + input);
170170
}
171171
}
172172

@@ -194,10 +194,10 @@ public String nextName() {
194194

195195
String name = readString();
196196
skipWhitespace(input);
197-
char read = input.read();
197+
int read = input.read();
198198
if (read != ':') {
199199
throw new JsonException(
200-
"Unable to read name. Expected colon separator, but saw '" + read + "'");
200+
"Unable to read name. Expected colon separator, but saw '" + (char) read + "'");
201201
}
202202
return name;
203203
}
@@ -241,13 +241,13 @@ public Number nextNumber() {
241241
case '7':
242242
case '8':
243243
case '9':
244-
builder.append(input.read());
244+
builder.append((char) input.read());
245245
break;
246246
case '.':
247247
case 'e':
248248
case 'E':
249249
mightBeDecimal = true;
250-
builder.append(input.read());
250+
builder.append((char) input.read());
251251
break;
252252
default:
253253
read = false;
@@ -552,11 +552,11 @@ private void expect(JsonType type) {
552552

553553
int toCompareLength = toCompare.length();
554554
for (int i = 0; i < toCompareLength; i++) {
555-
char read = input.read();
555+
int read = input.read();
556556
if (read != toCompare.charAt(i)) {
557557
throw new JsonException(
558558
String.format(
559-
"Unable to read %s. Saw %s at position %d. %s", toCompare, read, i, input));
559+
"Unable to read %s. Saw %s at position %d. %s", toCompare, (char) read, i, input));
560560
}
561561
}
562562

@@ -574,9 +574,8 @@ private String readString() {
574574
input.read(); // Skip leading quote
575575

576576
StringBuilder builder = new StringBuilder();
577-
char c;
578577
while (true) {
579-
c = input.read();
578+
int c = input.read();
580579
switch (c) {
581580
case Input.EOF:
582581
throw new JsonException("Unterminated string: " + builder + ". " + input);
@@ -586,7 +585,7 @@ private String readString() {
586585
readEscape(builder);
587586
break;
588587
default:
589-
builder.append(c);
588+
builder.append((char) c);
590589
}
591590
}
592591
}
@@ -601,7 +600,7 @@ private String readString() {
601600
*/
602601
// FIXME: This function doesn't appear to support UTF-8 or UTF-32.
603602
private void readEscape(StringBuilder builder) {
604-
char read = input.read();
603+
int read = input.read();
605604

606605
// List from: https://tools.ietf.org/html/rfc7159.html#section-7
607606
switch (read) {
@@ -629,10 +628,10 @@ private void readEscape(StringBuilder builder) {
629628
int result = 0;
630629
int multiplier = 4096; // (16 * 16 * 16) as we start from the thousands and work to units.
631630
for (int i = 0; i < 4; i++) {
632-
char c = input.read();
631+
int c = input.read();
633632
int digit = Character.digit(c, 16);
634633
if (digit == -1) {
635-
throw new JsonException(c + " is not a hexadecimal digit. " + input);
634+
throw new JsonException((char) c + " is not a hexadecimal digit. " + input);
636635
}
637636
result += digit * multiplier;
638637
multiplier /= 16;
@@ -643,11 +642,11 @@ private void readEscape(StringBuilder builder) {
643642
case '/':
644643
case '\\':
645644
case '"':
646-
builder.append(read);
645+
builder.append((char) read);
647646
break;
648647

649648
default:
650-
throw new JsonException("Unexpected escape code: " + read + ". " + input);
649+
throw new JsonException("Unexpected escape code: " + (char) read + ". " + input);
651650
}
652651
}
653652

java/test/org/openqa/selenium/json/JsonInputTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,24 @@ void shouldBeAbleToReadNonWellFormedDataLongerThanReadBuffer() {
291291
}
292292
}
293293

294+
@Test
295+
void shouldReadU_FFFF_AsALiteralCharacterAndNotEndOfInput() {
296+
// U+FFFF is a valid Unicode code unit that historically collided with the in-band EOF
297+
// sentinel and was mis-reported as an unterminated string. Build the strings from
298+
// char values rather than embedding literal U+FFFF so the test is independent of the
299+
// source file's byte encoding.
300+
char nonChar = (char) 0xFFFF;
301+
String literalPayload = "a" + nonChar + "b";
302+
303+
try (JsonInput input = newInput("\"" + literalPayload + "\"")) {
304+
assertThat(input.nextString()).isEqualTo(literalPayload);
305+
}
306+
307+
try (JsonInput input = newInput("\"\\uFFFF\"")) {
308+
assertThat(input.nextString()).isEqualTo(String.valueOf(nonChar));
309+
}
310+
}
311+
294312
@Test
295313
void nullInputsShouldCoerceAsNullValues() throws IOException {
296314
try (InputStream is = new ByteArrayInputStream(new byte[0]);

0 commit comments

Comments
 (0)