1818import com .tencent .trpc .core .rpc .AbstractRpcClient ;
1919import com .tencent .trpc .core .rpc .ConsumerInvoker ;
2020import java .io .IOException ;
21+ import java .util .concurrent .TimeUnit ;
2122import org .apache .http .impl .client .CloseableHttpClient ;
2223import org .apache .http .impl .client .HttpClients ;
2324import org .apache .http .impl .conn .PoolingHttpClientConnectionManager ;
2425
2526/**
2627 * HTTP protocol client.
28+ * <p>Long-connection mode: connections are pooled by Apache {@link PoolingHttpClientConnectionManager}
29+ * and reused across requests via HTTP/1.1 keep-alive. Two safeguards are enabled by default to
30+ * keep the pool healthy in long-running processes:
31+ * <ul>
32+ * <li>{@code validateAfterInactivity}: re-check a connection's liveness before reuse if it
33+ * has been idle for a short period (avoids the classic "stale connection / NoHttpResponseException"
34+ * when the server has half-closed an idle keep-alive connection);</li>
35+ * <li>{@code evictIdleConnections}: a small background thread evicts connections that have
36+ * been idle longer than the configured limit, freeing OS file descriptors.</li>
37+ * </ul>
2738 */
2839public class HttpRpcClient extends AbstractRpcClient {
2940
3041 private static final Logger logger = LoggerFactory .getLogger (HttpRpcClient .class );
3142
43+ /**
44+ * Validate a pooled connection before reuse if it has been idle for at least this many
45+ * milliseconds. Cheap heuristic that catches most server-side half-closed keep-alive sockets.
46+ */
47+ private static final int VALIDATE_AFTER_INACTIVITY_MS = 2000 ;
48+ /**
49+ * Evict pooled connections that have been idle for longer than this duration.
50+ */
51+ private static final long EVICT_IDLE_CONNECTIONS_SECONDS = 60L ;
52+ /**
53+ * If this client has not been used by any RPC for longer than this window, the periodic
54+ * scanner in {@code RpcClusterClientManager} will treat it as unavailable. After
55+ * a few consecutive unavailable observations the client gets closed and evicted from the
56+ * cluster cache, which is how we reclaim {@link HttpRpcClient} instances orphaned by backend
57+ * IP rotation (e.g. K8s pod IP drift). The window is intentionally large so that any
58+ * actively-used client is never affected.
59+ */
60+ private static final long IDLE_UNAVAILABLE_THRESHOLD_NANOS =
61+ java .util .concurrent .TimeUnit .MINUTES .toNanos (10 );
62+
3263 private CloseableHttpClient httpClient ;
64+ /**
65+ * Timestamp (System.nanoTime()) of the most recent RPC sent through this client. Updated by
66+ * {@link HttpConsumerInvoker} on each send.
67+ */
68+ private volatile long lastUsedNanos = System .nanoTime ();
3369
3470 public HttpRpcClient (ProtocolConfig config ) {
3571 setConfig (config );
@@ -44,7 +80,16 @@ protected void doOpen() {
4480 // If there is only one route, the maximum number of connections for a single route is the same
4581 // as the maximum number of connections for the entire connection pool.
4682 cm .setDefaultMaxPerRoute (maxConns );
47- httpClient = HttpClients .custom ().setConnectionManager (cm ).build ();
83+ // Re-validate idle pooled connections before reuse so we do not send a request through a
84+ // socket the server has already half-closed.
85+ cm .setValidateAfterInactivity (VALIDATE_AFTER_INACTIVITY_MS );
86+ httpClient = HttpClients .custom ()
87+ .setConnectionManager (cm )
88+ // Background eviction of stale & long-idle connections; keeps the pool tidy in
89+ // long-running processes without affecting hot connections.
90+ .evictExpiredConnections ()
91+ .evictIdleConnections (EVICT_IDLE_CONNECTIONS_SECONDS , TimeUnit .SECONDS )
92+ .build ();
4893 }
4994
5095 @ Override
@@ -64,6 +109,28 @@ public <T> ConsumerInvoker<T> createInvoker(ConsumerConfig<T> consumerConfig) {
64109 return new HttpConsumerInvoker <>(this , consumerConfig , protocolConfig );
65110 }
66111
112+ /**
113+ * Record that this client just served (or is about to serve) an RPC. Called by
114+ * {@link HttpConsumerInvoker} on every request.
115+ */
116+ public void markUsed () {
117+ lastUsedNanos = System .nanoTime ();
118+ }
119+
120+ /**
121+ * Reports the client as unavailable if it has been idle longer than
122+ * {@link #IDLE_UNAVAILABLE_THRESHOLD_NANOS}. This lets the cluster manager's periodic
123+ * reconnect-check timer eventually evict orphaned clients (e.g. after backend IP rotation)
124+ * even though Apache HttpClient itself has no notion of "remote permanently gone".
125+ */
126+ @ Override
127+ public boolean isAvailable () {
128+ if (!super .isAvailable ()) {
129+ return false ;
130+ }
131+ return (System .nanoTime () - lastUsedNanos ) <= IDLE_UNAVAILABLE_THRESHOLD_NANOS ;
132+ }
133+
67134 public CloseableHttpClient getHttpClient () {
68135 return httpClient ;
69136 }
0 commit comments