diff --git a/java/util/src/main/java/com/google/protobuf/util/JsonFormat.java b/java/util/src/main/java/com/google/protobuf/util/JsonFormat.java index bab5cc0bf07e9..c5fc0d3f0d4b8 100644 --- a/java/util/src/main/java/com/google/protobuf/util/JsonFormat.java +++ b/java/util/src/main/java/com/google/protobuf/util/JsonFormat.java @@ -1423,41 +1423,78 @@ private void printSingleFieldValue( } } + private static final String[] replacementChars; + + static { + replacementChars = new String[128]; + for (int i = 0; i <= 0x1f; i++) { + replacementChars[i] = String.format("\\u%04x", i); + } + replacementChars['"'] = "\\\""; + replacementChars['\\'] = "\\\\"; + replacementChars['\t'] = "\\t"; + replacementChars['\b'] = "\\b"; + replacementChars['\n'] = "\\n"; + replacementChars['\r'] = "\\r"; + replacementChars['\f'] = "\\f"; + + // These characters are fully legal in JSON, but are escaped to prevent XSS risks. Notably + // this topic is not like 'html escaping' where these would be replaced with something like + // `<`. The escaped or not ways of writing it are verbatim 2 exactly equivalent + // ways to represent the same exact value in JSON: consumers should never do any manual + // unescape to round trip the intended value. This replacement is only be semantically + // observable if someone tries to handle it raw textually and not as JSON. + for (int i : "<>&='".toCharArray()) { + replacementChars[i] = String.format("\\u%04x", i); + } + } + /** - * Prints a string value wrapped in double quotes, escaping any illegal or dangerous characters - * for JSON safety. + * If a character needs to be escaped, returns the replacement string. Otherwise, returns null. */ - private void printStringEscapedAndQuoted(final CharSequence value) throws IOException { - // gson.toJson() is expensive: only use it if the string isn't entirely safe to print - // directly. - if (isJsonSafeString(value)) { - generator.print("\""); - generator.print(value); - generator.print("\""); - } else { - generator.print(gson.toJson(value.toString())); + private static String getReplacementOrNull(char c) { + if (c < 128) { + return replacementChars[c]; + } + + // \u2028 and \u2029 are paragraph whitespace characters that had mismatch between the + // JSON spec and the browser JavaScript object literal spec. It is extremely unlikely + // this topic matters now, but for parity with GSON we escape them. + if (c == '\u2028') { + return "\\u2028"; + } + if (c == '\u2029') { + return "\\u2029"; } + + return null; } - private static boolean isJsonSafeString(CharSequence value) { + /** + * Prints a string value wrapped in double quotes, escaping any illegal or dangerous characters + * for JSON safety. + */ + private void printStringEscapedAndQuoted(final CharSequence value) throws IOException { + generator.print("\""); int len = value.length(); + int last = 0; for (int i = 0; i < len; i++) { char c = value.charAt(i); - // Bare characters, fully disallowed in JSON strings and which must be escaped. - if (c < 0x20 || c == '"' || c == '\\') { - return false; - } - // HTML-sensitive characters. These are allowed in JSON, but escaped to mitigate - // XSS risks when the JSON is rendered in HTML. - if (c == '<' || c == '>' || c == '&' || c == '=' || c == '\'') { - return false; + String replacement = getReplacementOrNull(c); + + // Keeps scanning to only call print() once on long runs that don't need escaping. + if (replacement == null) { + continue; } - // Non-ASCII characters are mostly safe, but we'll leave that to gson to decide. - if (c >= 127) { - return false; + if (last < i) { + generator.print(value.subSequence(last, i)); } + generator.print(replacement); + last = i + 1; } - return true; + + generator.print(value.subSequence(last, len)); + generator.print("\""); } } diff --git a/java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java b/java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java index d42e30fad89e3..e40836e3b33f4 100644 --- a/java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java +++ b/java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java @@ -13,6 +13,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.gson.Gson; import com.google.gson.JsonSyntaxException; import com.google.protobuf.Any; import com.google.protobuf.BoolValue; @@ -1950,6 +1951,38 @@ public void testStringEscapingWithControlCharacters() throws Exception { assertThat(builder.getOptionalString()).isEqualTo(complexString); } + @Test + public void testStringEscapingAgainstGsonParity_allLowChars() throws Exception { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i <= 256; i++) { + sb.append((char) i); + } + String allChars = sb.toString(); + TestAllTypes message = TestAllTypes.newBuilder().setOptionalString(allChars).build(); + String gsonEscaped = new Gson().toJson(allChars); + + String expectedJson = "{\n \"optionalString\": " + gsonEscaped + "\n}"; + assertThat(toJsonString(message)).isEqualTo(expectedJson); + + TestAllTypes.Builder builder = TestAllTypes.newBuilder(); + JsonFormat.parser().merge(toJsonString(message), builder); + assertThat(builder.getOptionalString()).isEqualTo(allChars); + } + + @Test + public void testStringEscapingAgainstGsonParity_danglingSurrogate() throws Exception { + String danglingSurrogate = "foo \uD800 bar"; + TestAllTypes message = TestAllTypes.newBuilder().setOptionalString(danglingSurrogate).build(); + String gsonEscaped = new Gson().toJson(danglingSurrogate); + + String expectedJson = "{\n \"optionalString\": " + gsonEscaped + "\n}"; + assertThat(toJsonString(message)).isEqualTo(expectedJson); + + TestAllTypes.Builder builder = TestAllTypes.newBuilder(); + JsonFormat.parser().merge(toJsonString(message), builder); + assertThat(builder.getOptionalString()).isEqualTo(danglingSurrogate); + } + @Test public void testDefaultValueOptionsProto3() throws Exception { TestAllTypes message = TestAllTypes.getDefaultInstance();