Skip to content

Commit f566906

Browse files
committed
HTTPCLIENT-1843 Plug Commons-Compress into HttpClient’s automatic
content-decoding (optional) * New ContentDecoderRegistry discovers extra codecs (br, zstd, xz, lz4, …) via Commons-Compress when that jar is on the class-path; otherwise falls back to the built-ins (gzip, deflate) only. * No hard dependency added—projects that need the extra algorithms just add `commons-compress` (and helper jars like google-brotli, zstd-jni, xz-java) to their pom and HttpClient uses them automatically.
1 parent fd2870e commit f566906

11 files changed

Lines changed: 495 additions & 26 deletions

File tree

httpclient5-testing/src/test/java/org/apache/hc/client5/testing/sync/TestRedirects.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ public void handle(final ClassicHttpRequest request,
646646
Assertions.assertEquals(new URIBuilder().setHttpHost(target).setPath("/random/100").build(),
647647
reqWrapper.getUri());
648648

649-
assertThat(values.poll(), CoreMatchers.equalTo("gzip, x-gzip, deflate"));
650-
assertThat(values.poll(), CoreMatchers.equalTo("gzip, x-gzip, deflate"));
649+
assertThat(values.poll(), CoreMatchers.equalTo("gzip, deflate, lz4-framed, lz4-block, bzip2, pack200, deflate64, x-gzip"));
650+
assertThat(values.poll(), CoreMatchers.equalTo("gzip, deflate, lz4-framed, lz4-block, bzip2, pack200, deflate64, x-gzip"));
651651
assertThat(values.poll(), CoreMatchers.nullValue());
652652
}
653653

httpclient5/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@
108108
<artifactId>commons-io</artifactId>
109109
<scope>test</scope>
110110
</dependency>
111+
<dependency>
112+
<groupId>org.apache.commons</groupId>
113+
<artifactId>commons-compress</artifactId>
114+
<optional>true</optional>
115+
</dependency>
111116
</dependencies>
112117

113118
<build>

httpclient5/src/main/java/org/apache/hc/client5/http/entity/BrotliInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
@Contract(threading = ThreadingBehavior.STATELESS)
4242
public class BrotliInputStreamFactory implements InputStreamFactory {
4343

44+
/**
45+
* Canonical token for the deflate content-coding.
46+
* @since 5.6
47+
*/
48+
public static final String ENCODING = "br";
49+
50+
@Override
51+
public String getContentEncoding() {
52+
return ENCODING;
53+
}
54+
4455
/**
4556
* Default instance of {@link BrotliInputStreamFactory}.
4657
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/DeflateInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
@Contract(threading = ThreadingBehavior.STATELESS)
4242
public class DeflateInputStreamFactory implements InputStreamFactory {
4343

44+
/**
45+
* Canonical token for the deflate content-coding.
46+
* @since 5.6
47+
*/
48+
public static final String ENCODING = "deflate";
49+
50+
@Override
51+
public String getContentEncoding() {
52+
return ENCODING;
53+
}
54+
4455
/**
4556
* Default instance of {@link DeflateInputStreamFactory}.
4657
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/GZIPInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@
4242
@Contract(threading = ThreadingBehavior.STATELESS)
4343
public class GZIPInputStreamFactory implements InputStreamFactory {
4444

45+
/**
46+
* Canonical token for the gzip content-coding.
47+
* @since 5.6
48+
*/
49+
public static final String ENCODING = "gzip";
50+
51+
@Override
52+
public String getContentEncoding() {
53+
return ENCODING;
54+
}
55+
4556
/**
4657
* Default instance of {@link GZIPInputStreamFactory}.
4758
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/InputStreamFactory.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,21 @@ public interface InputStreamFactory {
3838

3939
InputStream create(InputStream inputStream) throws IOException;
4040

41+
/**
42+
* Returns the canonical {@code Content-Encoding} token handled by this
43+
* factory (for example {@code "gzip"}, {@code "deflate"}, {@code "br"}).
44+
* <p>
45+
* Implementations that do <strong>not</strong> represent a HTTP
46+
* content-decoder should simply inherit the default implementation,
47+
* which returns an empty string.
48+
*
49+
* @return the lower-case encoding token, or an empty string when the
50+
* factory is not intended for HTTP content-decoding
51+
*
52+
* @since 5.6
53+
*/
54+
default String getContentEncoding() {
55+
return "";
56+
}
57+
4158
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
28+
package org.apache.hc.client5.http.entity.compress;
29+
30+
import java.io.IOException;
31+
import java.io.InputStream;
32+
import java.util.Collections;
33+
import java.util.EnumMap;
34+
import java.util.Locale;
35+
import java.util.Map;
36+
37+
import org.apache.commons.compress.compressors.CompressorException;
38+
import org.apache.commons.compress.compressors.CompressorStreamFactory;
39+
import org.apache.hc.client5.http.entity.InputStreamFactory;
40+
import org.apache.hc.core5.annotation.Contract;
41+
import org.apache.hc.core5.annotation.Internal;
42+
import org.apache.hc.core5.annotation.ThreadingBehavior;
43+
44+
/**
45+
* A factory for creating InputStream instances, utilizing Apache Commons Compress.
46+
* This class is compiled with Commons Compress as an optional dependency, loading
47+
* only when the library is present at runtime, avoiding mandatory inclusion in
48+
* downstream builds.
49+
* <p>
50+
* <p>
51+
* Some encodings require native helper JARs; runtime availability is checked
52+
* using a lightweight Class.forName probe to register codecs only when helpers
53+
* are present.
54+
*
55+
* @since 5.6
56+
*/
57+
@Internal
58+
@Contract(threading = ThreadingBehavior.STATELESS)
59+
final class CommonsCompressDecoderFactory implements InputStreamFactory {
60+
61+
62+
/**
63+
* Map of codings that need extra JARs → the fully‐qualified class we test for
64+
*/
65+
private static final Map<ContentCoding, String> REQUIRED_CLASS_NAME;
66+
67+
static {
68+
final Map<ContentCoding, String> m = new EnumMap<>(ContentCoding.class);
69+
m.put(ContentCoding.BROTLI, "org.brotli.dec.BrotliInputStream");
70+
m.put(ContentCoding.ZSTD, "com.github.luben.zstd.ZstdInputStream");
71+
m.put(ContentCoding.XZ, "org.tukaani.xz.XZInputStream");
72+
m.put(ContentCoding.LZMA, "org.tukaani.xz.XZInputStream");
73+
REQUIRED_CLASS_NAME = Collections.unmodifiableMap(m);
74+
}
75+
76+
private final String encoding;
77+
78+
CommonsCompressDecoderFactory(final String encoding) {
79+
this.encoding = encoding.toLowerCase(Locale.ROOT);
80+
}
81+
82+
@Override
83+
public String getContentEncoding() {
84+
return encoding;
85+
}
86+
87+
@Override
88+
public InputStream create(final InputStream source) throws IOException {
89+
try {
90+
return new CompressorStreamFactory()
91+
.createCompressorInputStream(encoding, source);
92+
} catch (final CompressorException | LinkageError ex) {
93+
throw new IOException(
94+
"Unable to decode Content-Encoding '" + encoding + '\'', ex);
95+
}
96+
}
97+
98+
99+
static boolean runtimeAvailable(final String token) {
100+
final ContentCoding coding = ContentCoding.fromToken(token);
101+
if (coding == null) {
102+
return true;
103+
}
104+
final String helper = REQUIRED_CLASS_NAME.get(coding);
105+
if (helper == null) {
106+
// no extra JAR needed
107+
return true;
108+
}
109+
try {
110+
Class.forName(helper, false,
111+
CommonsCompressDecoderFactory.class.getClassLoader());
112+
return true;
113+
} catch (final ClassNotFoundException | LinkageError ex) {
114+
return false;
115+
}
116+
}
117+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
28+
package org.apache.hc.client5.http.entity.compress;
29+
30+
import java.util.Collections;
31+
import java.util.HashMap;
32+
import java.util.Locale;
33+
import java.util.Map;
34+
35+
/**
36+
* Enumeration of the canonical IANA content-coding tokens supported by HttpClient for
37+
* HTTP request and response bodies.
38+
* <p>
39+
* Each constant corresponds to the standard token used in the {@code Content-Encoding}
40+
* and {@code Accept-Encoding} headers. Some codings (e.g. Brotli, Zstandard, XZ/LZMA)
41+
* may require additional helper libraries at runtime.
42+
*
43+
* @since 5.6
44+
*/
45+
public enum ContentCoding {
46+
47+
/**
48+
* GZIP compression format.
49+
*/
50+
GZIP("gzip"),
51+
/**
52+
* "deflate" compression format (zlib or raw).
53+
*/
54+
DEFLATE("deflate"),
55+
/**
56+
* Legacy alias for GZIP.
57+
*/
58+
X_GZIP("x-gzip"),
59+
60+
// Optional codecs requiring Commons-Compress or native helpers
61+
/**
62+
* Brotli compression format.
63+
*/
64+
BROTLI("br"),
65+
/**
66+
* Zstandard compression format.
67+
*/
68+
ZSTD("zstd"),
69+
/**
70+
* XZ compression format.
71+
*/
72+
XZ("xz"),
73+
/**
74+
* LZMA compression format.
75+
*/
76+
LZMA("lzma"),
77+
/**
78+
* Framed LZ4 compression format.
79+
*/
80+
LZ4_FRAMED("lz4-framed"),
81+
/**
82+
* Block LZ4 compression format.
83+
*/
84+
LZ4_BLOCK("lz4-block"),
85+
/**
86+
* BZIP2 compression format.
87+
*/
88+
BZIP2("bzip2"),
89+
/**
90+
* Pack200 compression format.
91+
*/
92+
PACK200("pack200"),
93+
/**
94+
* Deflate64 compression format.
95+
*/
96+
DEFLATE64("deflate64");
97+
98+
private static final Map<String, ContentCoding> TOKEN_LOOKUP;
99+
static {
100+
final Map<String, ContentCoding> map = new HashMap<>(values().length, 1f);
101+
for (final ContentCoding contentCoding : values()) {
102+
map.put(contentCoding.token, contentCoding);
103+
}
104+
TOKEN_LOOKUP = Collections.unmodifiableMap(map);
105+
}
106+
107+
private final String token;
108+
109+
ContentCoding(final String token) {
110+
this.token = token;
111+
}
112+
113+
/**
114+
* Returns the standard IANA token string for this content-coding.
115+
*
116+
* @return the lowercase token used in HTTP headers
117+
*/
118+
public String token() {
119+
return token;
120+
}
121+
122+
/**
123+
* Lookup an enum by its token (case‐insensitive), or {@code null} if none matches.
124+
* <p>
125+
* This method is backed by a static, pre‐populated map so the lookup is O(1)
126+
* instead of O(n).</p>
127+
*
128+
* @param token the content‐coding token to look up
129+
* @return the matching enum constant, or {@code null} if none
130+
*/
131+
public static ContentCoding fromToken(final String token) {
132+
return TOKEN_LOOKUP.get(
133+
token == null ? null : token.toLowerCase(Locale.ROOT)
134+
);
135+
}
136+
}

0 commit comments

Comments
 (0)