Skip to content

Commit d8798b6

Browse files
committed
HTTPCLIENT-2418 - Fix default charset handling in SimpleBody for JSON content.Use UTF-8 for JSON media types when no charset parameter is present.
1 parent 726eac2 commit d8798b6

2 files changed

Lines changed: 99 additions & 6 deletions

File tree

httpclient5/src/main/java/org/apache/hc/client5/http/async/methods/SimpleBody.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,31 @@ public final class SimpleBody {
5050
this.contentType = contentType;
5151
}
5252

53+
private static Charset getCharset(final ContentType contentType) {
54+
if (contentType != null) {
55+
final Charset charset = contentType.getCharset();
56+
if (charset != null) {
57+
return charset;
58+
}
59+
final String mimeType = contentType.getMimeType();
60+
if (contentType.isSameMimeType(ContentType.APPLICATION_JSON)
61+
|| contentType.isSameMimeType(ContentType.APPLICATION_NDJSON)
62+
|| contentType.isSameMimeType(ContentType.APPLICATION_PROBLEM_JSON)
63+
|| mimeType != null && mimeType.length() >= 5
64+
&& mimeType.regionMatches(true, mimeType.length() - 5, "+json", 0, 5)) {
65+
return StandardCharsets.UTF_8;
66+
}
67+
return StandardCharsets.US_ASCII;
68+
}
69+
return ContentType.DEFAULT_TEXT.getCharset();
70+
}
71+
5372
static SimpleBody create(final String body, final ContentType contentType) {
5473
Args.notNull(body, "Body");
5574
if (body.length() > 2048) {
5675
return new SimpleBody(null, body, contentType);
5776
}
58-
final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset();
59-
final byte[] bytes = body.getBytes(charset != null ? charset : StandardCharsets.US_ASCII);
77+
final byte[] bytes = body.getBytes(getCharset(contentType));
6078
return new SimpleBody(bytes, null, contentType);
6179
}
6280

@@ -83,8 +101,7 @@ public byte[] getBodyBytes() {
83101
if (bodyAsBytes != null) {
84102
return bodyAsBytes;
85103
} else if (bodyAsText != null) {
86-
final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset();
87-
return bodyAsText.getBytes(charset != null ? charset : StandardCharsets.US_ASCII);
104+
return bodyAsText.getBytes(getCharset(contentType));
88105
} else {
89106
return null;
90107
}
@@ -97,8 +114,7 @@ public byte[] getBodyBytes() {
97114
*/
98115
public String getBodyText() {
99116
if (bodyAsBytes != null) {
100-
final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset();
101-
return new String(bodyAsBytes, charset != null ? charset : StandardCharsets.US_ASCII);
117+
return new String(bodyAsBytes, getCharset(contentType));
102118
}
103119
return bodyAsText;
104120
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
package org.apache.hc.client5.http.async.methods;
28+
29+
import java.nio.charset.StandardCharsets;
30+
31+
import org.apache.hc.core5.http.ContentType;
32+
import org.junit.jupiter.api.Assertions;
33+
import org.junit.jupiter.api.Test;
34+
35+
class TestSimpleBody {
36+
37+
@Test
38+
void testGetBodyTextUsesUtf8ForJsonWithoutCharsetParameter() {
39+
final String message = "{\"msg\": \"Test emoji 👋\"}";
40+
final SimpleBody body = SimpleBody.create(
41+
message.getBytes(StandardCharsets.UTF_8),
42+
ContentType.parse("application/json"));
43+
44+
Assertions.assertEquals(message, body.getBodyText());
45+
}
46+
47+
@Test
48+
void testGetBodyBytesUsesUtf8ForJsonWithoutCharsetParameter() {
49+
final String message = "{\"msg\": \"Test emoji 👋\"}";
50+
final SimpleBody body = SimpleBody.create(
51+
message,
52+
ContentType.parse("application/json"));
53+
54+
Assertions.assertArrayEquals(message.getBytes(StandardCharsets.UTF_8), body.getBodyBytes());
55+
}
56+
57+
@Test
58+
void testGetBodyTextUsesUtf8ForProblemJsonWithoutCharsetParameter() {
59+
final String message = "{\"title\": \"Bad request 👋\"}";
60+
final SimpleBody body = SimpleBody.create(
61+
message.getBytes(StandardCharsets.UTF_8),
62+
ContentType.parse("application/problem+json"));
63+
64+
Assertions.assertEquals(message, body.getBodyText());
65+
}
66+
67+
@Test
68+
void testExplicitCharsetStillWins() {
69+
final String message = "{\"msg\": \"hi\"}";
70+
final byte[] utf16 = message.getBytes(StandardCharsets.UTF_16);
71+
final SimpleBody body = SimpleBody.create(
72+
utf16,
73+
ContentType.parse("application/json; charset=UTF-16"));
74+
75+
Assertions.assertEquals(message, body.getBodyText());
76+
}
77+
}

0 commit comments

Comments
 (0)