Skip to content

Commit e69a442

Browse files
bundoleeclaude
andcommitted
fix: address code review feedback for health check
- Separate connection failure vs server error in DoclingFastServerClient: unreachable → "not available", HTTP 503 → "reachable but unhealthy" - Add comment explaining why HancomClient accepts non-2xx responses - Inline getClient() call in HybridDocumentProcessor - Add comment explaining intentional pre-triage health check - Update test assertions for 503 case and add RFC 5737 note Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b83b23d commit e69a442

4 files changed

Lines changed: 25 additions & 11 deletions

File tree

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/DoclingFastServerClient.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,23 @@ public void checkAvailability() throws IOException {
9999
.get()
100100
.build();
101101

102-
try (Response response = healthClient.newCall(healthRequest).execute()) {
103-
if (!response.isSuccessful()) {
104-
throw new IOException("Hybrid server health check failed with status " + response.code());
105-
}
102+
Response response;
103+
try {
104+
response = healthClient.newCall(healthRequest).execute();
106105
} catch (IOException e) {
107106
throw new IOException(
108107
"Hybrid server is not available at " + baseUrl + "\n"
109108
+ "Please start the server with: opendataloader-pdf-hybrid\n"
110109
+ "Or run without --hybrid flag for Java-only processing.", e);
111110
}
111+
try (response) {
112+
if (!response.isSuccessful()) {
113+
throw new IOException(
114+
"Hybrid server at " + baseUrl + " returned HTTP " + response.code()
115+
+ " during health check.\n"
116+
+ "The server is reachable but may be starting up or unhealthy.");
117+
}
118+
}
112119
}
113120

114121
@Override

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HancomClient.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,9 @@ public void checkAvailability() throws IOException {
106106
.build();
107107

108108
try (Response response = healthClient.newCall(request).execute()) {
109-
// Any response means the server is reachable
109+
// Any HTTP response (including 4xx/5xx) means the server is reachable.
110+
// Hancom API requires authentication for all endpoints, so a 401/403
111+
// is expected and still proves connectivity.
110112
} catch (IOException e) {
111113
throw new IOException(
112114
"Hybrid server is not available at " + baseUrl + "\n"

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/HybridDocumentProcessor.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,10 @@ public static List<List<IObject>> processDocument(
9898
int totalPages = StaticContainers.getDocument().getNumberOfPages();
9999
LOGGER.log(Level.INFO, "Starting hybrid processing for {0} pages", totalPages);
100100

101-
// Phase 0: Check backend availability before any processing
102-
HybridClient client = getClient(config);
103-
client.checkAvailability();
101+
// Phase 0: Check backend availability before any processing.
102+
// Runs before triage intentionally — if the user explicitly requested hybrid mode,
103+
// they expect the server to be available regardless of how pages would be routed.
104+
getClient(config).checkAvailability();
104105

105106
// Phase 1: Filter all pages and collect filtered contents
106107
Map<Integer, List<IObject>> filteredContents = filterAllPages(inputPdfName, config, pagesToProcess, totalPages);

java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/HealthCheckTest.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,10 @@ void testDoclingHealthCheckFailsOnServerError() throws IOException {
9696

9797
try {
9898
IOException exception = assertThrows(IOException.class, client::checkAvailability);
99-
assertTrue(exception.getMessage().contains("not available"),
100-
"Error message should indicate server is not available");
99+
assertTrue(exception.getMessage().contains("returned HTTP 503"),
100+
"Error message should include the HTTP status code");
101+
assertTrue(exception.getMessage().contains("reachable but"),
102+
"Error message should indicate server is reachable but unhealthy");
101103
} finally {
102104
client.shutdown();
103105
}
@@ -141,7 +143,9 @@ void testHancomHealthCheckFailsWhenServerDown() throws IOException {
141143

142144
@Test
143145
void testHealthCheckTimesOutQuickly() throws IOException {
144-
// Use a non-routable IP to trigger connect timeout
146+
// Uses TEST-NET IP (RFC 5737) to trigger a connect timeout.
147+
// Some CI environments may reject packets instantly instead of timing out,
148+
// but the upper-bound assertion (< 10s) still holds in either case.
145149
String baseUrl = "http://192.0.2.1:9999";
146150
DoclingFastServerClient client = new DoclingFastServerClient(
147151
baseUrl, new OkHttpClient(), new ObjectMapper());

0 commit comments

Comments
 (0)