Skip to content

Commit 84166ba

Browse files
nddipiazzaelemdiscCopilot
authored
TIKA-4679: Add HTTP/2 support to tika-server via Jetty http2-server (#2672)
* Adding jetty http2 dependency allows cxf to support http2 requests. * TIKA-4679: Add e2e test module for HTTP/2 tika-server - Add tika-e2e-tests/tika-server module with TikaServerHttp2Test - Test starts the real fat-jar and verifies HTTP/2 (h2c) responses via Java HttpClient configured with Version.HTTP_2 - Wire module into tika-e2e-tests/pom.xml modules list - Module is skipped by default; enable with -Pe2e profile Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * TIKA-4679: Fix e2e test - skip when fat-jar absent, use /status health-check - Add Assumptions.assumeTrue(jar.exists()) so tests skip gracefully when tika-server-standard fat-jar hasn't been built (CI without prior install) - Change startup health-check from / to /status (more reliable 200 response) - Increase startup timeout to 90s for slower CI environments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * TIKA-4679: Fix e2e HTTP/2 test server startup and health-check - Use tika-server-standard assembly zip (unpacked via dependency plugin) instead of thin jar, so the required lib/ dependencies are available - Health-check endpoint changed from /status to / (root always returns 200; /status requires explicit endpoint config to be enabled) - Pre-negotiate h2c before PUT /tika parse test: h2c Upgrade requires a no-body request first; GET / establishes the HTTP/2 connection so the subsequent PUT reuses it correctly - Drop --noFork flag (TikaServerCli does not recognize it; server runs its own fork management independently) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * TIKA-4679: Address Copilot review comments on TikaServerHttp2Test - Remove unused moduleDir variable; initialize repoRoot directly - stopServer() now uses waitFor(5s) + destroyForcibly() + waitFor(30s) to avoid indefinite blocking if SIGTERM doesn't terminate the process Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Lawrence Moorehead <lawrence.moorehead@elemdiscovery.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent a7e6ac7 commit 84166ba

6 files changed

Lines changed: 389 additions & 1 deletion

File tree

tika-e2e-tests/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959

6060
<modules>
6161
<module>tika-grpc</module>
62+
<module>tika-server</module>
6263
</modules>
6364

6465
<dependencyManagement>

tika-e2e-tests/tika-server/pom.xml

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one
5+
or more contributor license agreements. See the NOTICE file
6+
distributed with this work for additional information
7+
regarding copyright ownership. The ASF licenses this file
8+
to you under the Apache License, Version 2.0 (the
9+
"License"); you may not use this file except in compliance
10+
with the License. You may obtain a copy of the License at
11+
12+
http://www.apache.org/licenses/LICENSE-2.0
13+
14+
Unless required by applicable law or agreed to in writing,
15+
software distributed under the License is distributed on an
16+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
KIND, either express or implied. See the License for the
18+
specific language governing permissions and limitations
19+
under the License.
20+
-->
21+
22+
<project xmlns="http://maven.apache.org/POM/4.0.0"
23+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
24+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
25+
<modelVersion>4.0.0</modelVersion>
26+
27+
<parent>
28+
<groupId>org.apache.tika</groupId>
29+
<artifactId>tika-e2e-tests</artifactId>
30+
<version>${revision}</version>
31+
<relativePath>../pom.xml</relativePath>
32+
</parent>
33+
34+
<artifactId>tika-e2e-tests-server</artifactId>
35+
<packaging>jar</packaging>
36+
<name>Apache Tika E2E Tests: REST Server</name>
37+
<description>End-to-end tests for tika-server-standard, including HTTP/2 support verification</description>
38+
39+
<properties>
40+
<!-- Path to the tika-server-standard binary assembly zip built in the same reactor -->
41+
<tika.server.zip>${project.basedir}/../../tika-server/tika-server-standard/target/tika-server-standard-${revision}-bin.zip</tika.server.zip>
42+
<!-- Directory where the assembly is unpacked before tests run -->
43+
<tika.server.home>${project.build.directory}/tika-server-dist</tika.server.home>
44+
</properties>
45+
46+
<dependencies>
47+
<dependency>
48+
<groupId>org.junit.jupiter</groupId>
49+
<artifactId>junit-jupiter-api</artifactId>
50+
<scope>test</scope>
51+
</dependency>
52+
<dependency>
53+
<groupId>org.junit.jupiter</groupId>
54+
<artifactId>junit-jupiter-engine</artifactId>
55+
<scope>test</scope>
56+
</dependency>
57+
<dependency>
58+
<groupId>org.slf4j</groupId>
59+
<artifactId>slf4j-api</artifactId>
60+
</dependency>
61+
<dependency>
62+
<groupId>org.apache.logging.log4j</groupId>
63+
<artifactId>log4j-core</artifactId>
64+
</dependency>
65+
<dependency>
66+
<groupId>org.apache.logging.log4j</groupId>
67+
<artifactId>log4j-slf4j2-impl</artifactId>
68+
</dependency>
69+
</dependencies>
70+
71+
<build>
72+
<plugins>
73+
<plugin>
74+
<groupId>org.apache.maven.plugins</groupId>
75+
<artifactId>maven-surefire-plugin</artifactId>
76+
<configuration>
77+
<!-- Skip by default; run with -Pe2e -->
78+
<skipTests>true</skipTests>
79+
</configuration>
80+
</plugin>
81+
<plugin>
82+
<groupId>org.apache.rat</groupId>
83+
<artifactId>apache-rat-plugin</artifactId>
84+
<configuration>
85+
<inputExcludes>
86+
<inputExclude>**/README*.md</inputExclude>
87+
<inputExclude>src/test/resources/**</inputExclude>
88+
</inputExcludes>
89+
</configuration>
90+
</plugin>
91+
</plugins>
92+
</build>
93+
94+
<profiles>
95+
<profile>
96+
<id>e2e</id>
97+
<build>
98+
<plugins>
99+
<plugin>
100+
<groupId>org.apache.maven.plugins</groupId>
101+
<artifactId>maven-dependency-plugin</artifactId>
102+
<executions>
103+
<execution>
104+
<id>unpack-tika-server</id>
105+
<phase>process-test-resources</phase>
106+
<goals>
107+
<goal>unpack</goal>
108+
</goals>
109+
<configuration>
110+
<artifactItems>
111+
<artifactItem>
112+
<groupId>org.apache.tika</groupId>
113+
<artifactId>tika-server-standard</artifactId>
114+
<version>${revision}</version>
115+
<classifier>bin</classifier>
116+
<type>zip</type>
117+
<overWrite>false</overWrite>
118+
<outputDirectory>${tika.server.home}</outputDirectory>
119+
</artifactItem>
120+
</artifactItems>
121+
</configuration>
122+
</execution>
123+
</executions>
124+
</plugin>
125+
<plugin>
126+
<groupId>org.apache.maven.plugins</groupId>
127+
<artifactId>maven-surefire-plugin</artifactId>
128+
<configuration>
129+
<skipTests>false</skipTests>
130+
<systemPropertyVariables>
131+
<tika.server.home>${tika.server.home}</tika.server.home>
132+
</systemPropertyVariables>
133+
</configuration>
134+
</plugin>
135+
</plugins>
136+
</build>
137+
</profile>
138+
</profiles>
139+
</project>
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.tika.server.e2e;
18+
19+
import static java.nio.charset.StandardCharsets.UTF_8;
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
22+
import java.io.BufferedReader;
23+
import java.io.InputStreamReader;
24+
import java.net.ServerSocket;
25+
import java.net.URI;
26+
import java.net.http.HttpClient;
27+
import java.net.http.HttpRequest;
28+
import java.net.http.HttpResponse;
29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
31+
import java.nio.file.Paths;
32+
import java.time.Duration;
33+
import java.time.Instant;
34+
35+
import org.junit.jupiter.api.AfterEach;
36+
import org.junit.jupiter.api.Assumptions;
37+
import org.junit.jupiter.api.BeforeEach;
38+
import org.junit.jupiter.api.Tag;
39+
import org.junit.jupiter.api.Test;
40+
import org.slf4j.Logger;
41+
import org.slf4j.LoggerFactory;
42+
43+
/**
44+
* End-to-end test verifying that tika-server-standard supports HTTP/2 (h2c cleartext).
45+
*
46+
* Starts the real fat-jar, sends a request using Java's HttpClient configured for HTTP/2,
47+
* and asserts the response was served over HTTP/2. This validates the runtime classpath
48+
* has the Jetty http2-server jar and CXF negotiates h2c correctly.
49+
*
50+
* Run with: mvn test -pl tika-e2e-tests/tika-server -Pe2e
51+
*
52+
* Inspired by Lawrence Moorehead's original contribution (elemdisc/tika PR#1, TIKA-4679).
53+
*/
54+
@Tag("E2ETest")
55+
public class TikaServerHttp2Test {
56+
57+
private static final Logger log = LoggerFactory.getLogger(TikaServerHttp2Test.class);
58+
private static final long SERVER_STARTUP_TIMEOUT_MS = 90_000;
59+
/** Health-check polls root (/), which always returns 200 without requiring endpoint config. */
60+
private static final String HEALTH_PATH = "/";
61+
62+
private Process serverProcess;
63+
private int port;
64+
private String endPoint;
65+
66+
@BeforeEach
67+
void startServer() throws Exception {
68+
port = findFreePort();
69+
endPoint = "http://localhost:" + port;
70+
71+
String serverHome = System.getProperty("tika.server.home");
72+
if (serverHome == null) {
73+
// fall back to conventional location relative to this module
74+
Path repoRoot = Paths.get("").toAbsolutePath();
75+
while (repoRoot != null && !repoRoot.resolve("tika-server").toFile().isDirectory()) {
76+
repoRoot = repoRoot.getParent();
77+
}
78+
if (repoRoot == null) {
79+
throw new IllegalStateException("Cannot locate tika root. Pass -Dtika.server.home=/path/to/extracted-assembly");
80+
}
81+
serverHome = repoRoot.resolve("tika-e2e-tests/tika-server/target/tika-server-dist").toAbsolutePath().toString();
82+
}
83+
84+
Path serverJar = Paths.get(serverHome, "tika-server.jar");
85+
Assumptions.assumeTrue(Files.exists(serverJar),
86+
"tika-server.jar not found at " + serverJar + "; skipping HTTP/2 e2e test. " +
87+
"Build with: mvn package -pl tika-server/tika-server-standard && " +
88+
"mvn test -pl tika-e2e-tests/tika-server -Pe2e");
89+
90+
log.info("Starting tika-server from: {}", serverJar);
91+
ProcessBuilder pb = new ProcessBuilder(
92+
"java", "-jar", "tika-server.jar",
93+
"-p", String.valueOf(port),
94+
"-h", "localhost"
95+
);
96+
pb.directory(Paths.get(serverHome).toFile());
97+
pb.redirectErrorStream(true);
98+
serverProcess = pb.start();
99+
100+
// Drain output in background so the process doesn't block
101+
Thread drainThread = new Thread(() -> {
102+
try (BufferedReader reader = new BufferedReader(
103+
new InputStreamReader(serverProcess.getInputStream(), UTF_8))) {
104+
String line;
105+
while ((line = reader.readLine()) != null) {
106+
log.info("tika-server: {}", line);
107+
}
108+
} catch (Exception e) {
109+
log.debug("Server output stream closed", e);
110+
}
111+
});
112+
drainThread.setDaemon(true);
113+
drainThread.start();
114+
115+
awaitServerStartup();
116+
}
117+
118+
@AfterEach
119+
void stopServer() throws Exception {
120+
if (serverProcess != null && serverProcess.isAlive()) {
121+
serverProcess.destroy();
122+
if (!serverProcess.waitFor(5, java.util.concurrent.TimeUnit.SECONDS)) {
123+
serverProcess.destroyForcibly();
124+
serverProcess.waitFor(30, java.util.concurrent.TimeUnit.SECONDS);
125+
}
126+
}
127+
}
128+
129+
@Test
130+
void testH2cTikaEndpoint() throws Exception {
131+
HttpClient httpClient = HttpClient.newBuilder()
132+
.version(HttpClient.Version.HTTP_2)
133+
.build();
134+
HttpRequest request = HttpRequest.newBuilder()
135+
.uri(URI.create(endPoint + "/tika"))
136+
.header("Accept", "text/plain")
137+
.GET()
138+
.build();
139+
140+
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString(UTF_8));
141+
142+
assertEquals(200, response.statusCode(), "Expected 200 from /tika");
143+
assertEquals(HttpClient.Version.HTTP_2, response.version(),
144+
"Expected HTTP/2 protocol; server may be missing http2-server on classpath");
145+
log.info("HTTP/2 h2c verified: {} {}", response.statusCode(), response.version());
146+
}
147+
148+
@Test
149+
void testH2cParseEndpoint() throws Exception {
150+
HttpClient httpClient = HttpClient.newBuilder()
151+
.version(HttpClient.Version.HTTP_2)
152+
.build();
153+
154+
// First: GET / to negotiate h2c upgrade, establishing an HTTP/2 connection
155+
HttpRequest warmup = HttpRequest.newBuilder()
156+
.uri(URI.create(endPoint + "/"))
157+
.GET()
158+
.build();
159+
httpClient.send(warmup, HttpResponse.BodyHandlers.discarding());
160+
161+
// Now PUT /tika — the existing HTTP/2 connection is reused
162+
byte[] body = "Hello, HTTP/2 world!".getBytes(UTF_8);
163+
HttpRequest request = HttpRequest.newBuilder()
164+
.uri(URI.create(endPoint + "/tika"))
165+
.header("Content-Type", "text/plain")
166+
.PUT(HttpRequest.BodyPublishers.ofByteArray(body))
167+
.build();
168+
169+
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString(UTF_8));
170+
171+
assertEquals(200, response.statusCode(), "Expected 200 from /tika");
172+
assertEquals(HttpClient.Version.HTTP_2, response.version(),
173+
"Expected HTTP/2 protocol on /tika endpoint");
174+
log.info("HTTP/2 parse endpoint verified: {} bytes returned over {}", response.body().length(), response.version());
175+
}
176+
177+
private void awaitServerStartup() throws Exception {
178+
// Use HTTP/1.1 for the health-check poll so we don't depend on HTTP/2 during startup.
179+
// Both connectTimeout and request timeout are set to avoid hanging when Jetty has bound
180+
// the port but CXF has not yet finished initializing (accepts TCP but doesn't respond).
181+
HttpClient pollClient = HttpClient.newBuilder()
182+
.version(HttpClient.Version.HTTP_1_1)
183+
.connectTimeout(Duration.ofSeconds(5))
184+
.build();
185+
186+
Instant deadline = Instant.now().plusMillis(SERVER_STARTUP_TIMEOUT_MS);
187+
while (Instant.now().isBefore(deadline)) {
188+
if (!serverProcess.isAlive()) {
189+
throw new IllegalStateException(
190+
"tika-server process exited unexpectedly with code " + serverProcess.exitValue());
191+
}
192+
try {
193+
HttpRequest pollRequest = HttpRequest.newBuilder()
194+
.uri(URI.create(endPoint + HEALTH_PATH))
195+
.timeout(Duration.ofSeconds(5))
196+
.GET()
197+
.build();
198+
HttpResponse<Void> resp = pollClient.send(pollRequest, HttpResponse.BodyHandlers.discarding());
199+
if (resp.statusCode() == 200) {
200+
log.info("tika-server ready on port {}", port);
201+
return;
202+
}
203+
log.debug("Server returned {} on {}; still waiting...", resp.statusCode(), HEALTH_PATH);
204+
} catch (Exception e) {
205+
log.debug("Waiting for server on port {}: {}", port, e.getMessage());
206+
}
207+
Thread.sleep(1000);
208+
}
209+
throw new IllegalStateException("tika-server did not start within " + SERVER_STARTUP_TIMEOUT_MS + " ms");
210+
}
211+
212+
private static int findFreePort() throws Exception {
213+
try (ServerSocket s = new ServerSocket(0)) {
214+
return s.getLocalPort();
215+
}
216+
}
217+
}

tika-parent/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,11 @@
593593
<artifactId>http2-common</artifactId>
594594
<version>${jetty.http2.version}</version>
595595
</dependency>
596+
<dependency>
597+
<groupId>org.eclipse.jetty.http2</groupId>
598+
<artifactId>http2-server</artifactId>
599+
<version>${jetty.http2.version}</version>
600+
</dependency>
596601
<dependency>
597602
<groupId>org.jsoup</groupId>
598603
<artifactId>jsoup</artifactId>

tika-server/tika-server-core/pom.xml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@
8989
<groupId>org.apache.cxf</groupId>
9090
<artifactId>cxf-rt-transports-http-jetty</artifactId>
9191
</dependency>
92+
<dependency>
93+
<groupId>org.eclipse.jetty.http2</groupId>
94+
<artifactId>http2-server</artifactId>
95+
</dependency>
9296
<dependency>
9397
<groupId>org.apache.cxf</groupId>
9498
<artifactId>cxf-rt-rs-security-cors</artifactId>
@@ -261,4 +265,4 @@
261265
<scm>
262266
<tag>3.0.0-rc1</tag>
263267
</scm>
264-
</project>
268+
</project>

0 commit comments

Comments
 (0)