diff --git a/java/src/org/openqa/selenium/json/Input.java b/java/src/org/openqa/selenium/json/Input.java index d1f639abb3c64..9a282811ca8ae 100644 --- a/java/src/org/openqa/selenium/json/Input.java +++ b/java/src/org/openqa/selenium/json/Input.java @@ -29,8 +29,13 @@ * read characters in the input buffer. */ class Input { - /** end-of-file indicator (0xFFFD) */ - public static final char EOF = (char) -1; // NOTE: Produces Unicode replacement character (0xFFFD) + /** + * End-of-input sentinel returned by {@link #peek()} and {@link #read()}. + * + *

Value {@code -1} mirrors {@link java.io.Reader#read()} and — unlike a {@code char} sentinel + * — cannot collide with any valid UTF-16 code unit (including U+FFFF). + */ + public static final int EOF = -1; /** the number of chars to buffer */ private static final int BUFFER_SIZE = 4096; @@ -64,18 +69,20 @@ public Input(Reader source) { /** * Extract the next character from the input without consuming it. * - * @return the next input character; {@link #EOF} if input is exhausted + * @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if + * input is exhausted */ - public char peek() { + public int peek() { return fill() ? buffer[position + 1] : EOF; } /** * Read and consume the next character from the input. * - * @return the next input character; {@link #EOF} if input is exhausted + * @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if + * input is exhausted */ - public char read() { + public int read() { return fill() ? buffer[++position] : EOF; } diff --git a/java/src/org/openqa/selenium/json/JsonInput.java b/java/src/org/openqa/selenium/json/JsonInput.java index 99c79373db482..4f4220f78fe0c 100644 --- a/java/src/org/openqa/selenium/json/JsonInput.java +++ b/java/src/org/openqa/selenium/json/JsonInput.java @@ -165,8 +165,8 @@ public JsonType peek() { return JsonType.END; default: - char c = input.read(); - throw new JsonException("Unable to determine type from: " + c + ". " + input); + int c = input.read(); + throw new JsonException("Unable to determine type from: " + (char) c + ". " + input); } } @@ -194,10 +194,10 @@ public String nextName() { String name = readString(); skipWhitespace(input); - char read = input.read(); + int read = input.read(); if (read != ':') { throw new JsonException( - "Unable to read name. Expected colon separator, but saw '" + read + "'"); + "Unable to read name. Expected colon separator, but saw '" + (char) read + "'"); } return name; } @@ -241,13 +241,13 @@ public Number nextNumber() { case '7': case '8': case '9': - builder.append(input.read()); + builder.append((char) input.read()); break; case '.': case 'e': case 'E': mightBeDecimal = true; - builder.append(input.read()); + builder.append((char) input.read()); break; default: read = false; @@ -552,11 +552,11 @@ private void expect(JsonType type) { int toCompareLength = toCompare.length(); for (int i = 0; i < toCompareLength; i++) { - char read = input.read(); + int read = input.read(); if (read != toCompare.charAt(i)) { throw new JsonException( String.format( - "Unable to read %s. Saw %s at position %d. %s", toCompare, read, i, input)); + "Unable to read %s. Saw %s at position %d. %s", toCompare, (char) read, i, input)); } } @@ -574,9 +574,8 @@ private String readString() { input.read(); // Skip leading quote StringBuilder builder = new StringBuilder(); - char c; while (true) { - c = input.read(); + int c = input.read(); switch (c) { case Input.EOF: throw new JsonException("Unterminated string: " + builder + ". " + input); @@ -586,7 +585,7 @@ private String readString() { readEscape(builder); break; default: - builder.append(c); + builder.append((char) c); } } } @@ -601,7 +600,7 @@ private String readString() { */ // FIXME: This function doesn't appear to support UTF-8 or UTF-32. private void readEscape(StringBuilder builder) { - char read = input.read(); + int read = input.read(); // List from: https://tools.ietf.org/html/rfc7159.html#section-7 switch (read) { @@ -629,10 +628,10 @@ private void readEscape(StringBuilder builder) { int result = 0; int multiplier = 4096; // (16 * 16 * 16) as we start from the thousands and work to units. for (int i = 0; i < 4; i++) { - char c = input.read(); + int c = input.read(); int digit = Character.digit(c, 16); if (digit == -1) { - throw new JsonException(c + " is not a hexadecimal digit. " + input); + throw new JsonException((char) c + " is not a hexadecimal digit. " + input); } result += digit * multiplier; multiplier /= 16; @@ -643,11 +642,11 @@ private void readEscape(StringBuilder builder) { case '/': case '\\': case '"': - builder.append(read); + builder.append((char) read); break; default: - throw new JsonException("Unexpected escape code: " + read + ". " + input); + throw new JsonException("Unexpected escape code: " + (char) read + ". " + input); } } diff --git a/java/test/org/openqa/selenium/json/JsonInputTest.java b/java/test/org/openqa/selenium/json/JsonInputTest.java index 289a5f8bbf4d9..73245c604e28a 100644 --- a/java/test/org/openqa/selenium/json/JsonInputTest.java +++ b/java/test/org/openqa/selenium/json/JsonInputTest.java @@ -291,6 +291,24 @@ void shouldBeAbleToReadNonWellFormedDataLongerThanReadBuffer() { } } + @Test + void shouldReadU_FFFF_AsALiteralCharacterAndNotEndOfInput() { + // U+FFFF is a valid Unicode code unit that historically collided with the in-band EOF + // sentinel and was mis-reported as an unterminated string. Build the strings from + // char values rather than embedding literal U+FFFF so the test is independent of the + // source file's byte encoding. + char nonChar = (char) 0xFFFF; + String literalPayload = "a" + nonChar + "b"; + + try (JsonInput input = newInput("\"" + literalPayload + "\"")) { + assertThat(input.nextString()).isEqualTo(literalPayload); + } + + try (JsonInput input = newInput("\"\\uFFFF\"")) { + assertThat(input.nextString()).isEqualTo(String.valueOf(nonChar)); + } + } + @Test void nullInputsShouldCoerceAsNullValues() throws IOException { try (InputStream is = new ByteArrayInputStream(new byte[0]);