Skip to content

Commit 5f73433

Browse files
NUTCH-3174 protocol-okhttp: request may hang despite http.time.limit is set
- set OkHttp's call timeout to the value of http.time.limit (if not -1) - add check whether http.time.limit is longer than http.timeout - more verbose logging
1 parent 31c44b2 commit 5f73433

3 files changed

Lines changed: 25 additions & 7 deletions

File tree

src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ public void setConf(Configuration conf) {
223223
this.timeout = conf.getInt("http.timeout", 10000);
224224
this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
225225
this.maxDuration = conf.getInt("http.time.limit", -1);
226+
if (maxDuration >= 0 && (maxDuration * 1000) < timeout) {
227+
LOG.warn(
228+
"The configuration property http.time.limit ({} seconds) is less than http.timeout ({} ms), "
229+
+ "the entire request will time out before individual reads are timed out.",
230+
maxDuration, timeout);
231+
}
226232
this.partialAsTruncated = conf.getBoolean("http.partial.truncated", false);
227233
this.userAgent = getAgentString(conf.get("http.agent.name"),
228234
conf.get("http.agent.version"), conf.get("http.agent.description"),
@@ -272,8 +278,8 @@ public void setConf(Configuration conf) {
272278
}
273279

274280
} catch (Exception e) {
275-
this.logger.warn("Failed to read http.agent.rotate.file {}: {}", agentsFile,
276-
StringUtils.stringifyException(e));
281+
this.logger.warn("Failed to read http.agent.rotate.file {}:",
282+
agentsFile, e);
277283
this.userAgentNames = null;
278284
} finally {
279285
if (br != null) {
@@ -314,8 +320,8 @@ public void setConf(Configuration conf) {
314320
}
315321
}
316322
} catch (Exception e) {
317-
this.logger.warn("Failed to read http.agent.host.cookie.file {}: {}",
318-
cookieFile, StringUtils.stringifyException(e));
323+
this.logger.warn("Failed to read http.agent.host.cookie.file {}:",
324+
cookieFile, e);
319325
this.hostCookies = null;
320326
} finally {
321327
if (br != null) {
@@ -614,8 +620,9 @@ protected void logConf() {
614620
this.logger.info("http.proxy.host = {}", this.proxyHost);
615621
this.logger.info("http.proxy.port = {}", this.proxyPort);
616622
this.logger.info("http.proxy.exception.list = {}", this.useProxy);
617-
this.logger.info("http.timeout = {}", this.timeout);
618-
this.logger.info("http.content.limit = {}", this.maxContent);
623+
this.logger.info("http.timeout = {} ms", this.timeout);
624+
this.logger.info("http.time.limit = {} seconds", this.maxDuration);
625+
this.logger.info("http.content.limit = {} bytes", this.maxContent);
619626
this.logger.info("http.agent = {}", this.userAgent);
620627
this.logger.info("http.accept.language = {}", this.acceptLanguage);
621628
this.logger.info("http.accept = {}", this.accept);

src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ public void setConf(Configuration conf) {
119119
.writeTimeout(this.timeout, TimeUnit.MILLISECONDS)
120120
.readTimeout(this.timeout, TimeUnit.MILLISECONDS);
121121

122+
if (this.maxDuration >= 0) {
123+
// timeout for the entire request
124+
builder.callTimeout(this.maxDuration, TimeUnit.SECONDS);
125+
}
126+
122127
if (!this.tlsCheckCertificate) {
123128
try {
124129
SSLContext trustAllSslContext = SSLContext.getInstance("TLS");

src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.nutch.protocol.okhttp;
1818

1919
import java.io.IOException;
20+
import java.io.InterruptedIOException;
2021
import java.lang.invoke.MethodHandles;
2122
import java.net.URL;
2223
import java.util.Base64;
@@ -179,7 +180,12 @@ private final byte[] toByteArray(final ResponseBody responseBody,
179180
} catch (IOException e) {
180181
if (partialAsTruncated && source.getBuffer().size() > 0) {
181182
// treat already fetched content as truncated
182-
truncated.setReason(TruncatedContentReason.DISCONNECT);
183+
if (e instanceof InterruptedIOException) {
184+
// thrown by OkHttp if the call timeout is hit
185+
truncated.setReason(TruncatedContentReason.TIME);
186+
} else {
187+
truncated.setReason(TruncatedContentReason.DISCONNECT);
188+
}
183189
LOG.info("Truncated content for {}, partial fetch caused by:", this.url,
184190
e);
185191
} else {

0 commit comments

Comments
 (0)