Skip to content

Commit 395bbaf

Browse files
authored
Add connect_down_policy: 4 for empty reply (#13069)
* Add connect_down_policy: 4 for empty reply * Fix AuTest failure on Linux
1 parent afdf454 commit 395bbaf

5 files changed

Lines changed: 193 additions & 20 deletions

File tree

doc/admin-guide/files/records.yaml.en.rst

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,12 +1834,24 @@ Origin Server Connect Attempts
18341834
:overridable:
18351835

18361836
Controls what origin server connection failures contribute to marking a server down.
1837-
When set to ``2``, any connection failure during the TCP and TLS handshakes will
1838-
contribute to marking the server down. When set to ``1``, only TCP handshake failures
1839-
will contribute to marking a server down. When set to ``0``, no connection failures
1840-
will be used towards marking a server down. When set to ``3``, all failures covered
1841-
by ``2`` plus transaction inactive timeouts (server goes silent after connection is
1842-
established) will contribute to marking a server down.
1837+
1838+
+-------+-----------------------------------------------------------------------+
1839+
| Value | Behavior |
1840+
+=======+=======================================================================+
1841+
| ``0`` | No connection failures contribute to marking a server down. |
1842+
+-------+-----------------------------------------------------------------------+
1843+
| ``1`` | TCP handshake failures (excluding TLS handshake failures) contribute |
1844+
| | to marking a server down. |
1845+
+-------+-----------------------------------------------------------------------+
1846+
| ``2`` | Any connection failure during the TCP or TLS handshake contributes to |
1847+
| | marking a server down. |
1848+
+-------+-----------------------------------------------------------------------+
1849+
| ``3`` | All failures covered by ``2``, plus transaction inactive timeouts |
1850+
| | (server goes silent after the connection is established). |
1851+
+-------+-----------------------------------------------------------------------+
1852+
| ``4`` | All failures covered by ``3``, plus cases where the origin closes the |
1853+
| | connection before sending any response bytes. |
1854+
+-------+-----------------------------------------------------------------------+
18431855

18441856
.. ts:cv:: CONFIG proxy.config.http.server_max_connections INT 0
18451857
:reloadable:

src/proxy/http/HttpSM.cc

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4676,22 +4676,39 @@ HttpSM::do_hostdb_reverse_lookup()
46764676
bool
46774677
HttpSM::track_connect_fail() const
46784678
{
4679-
bool retval = false;
4680-
if (t_state.current.server->had_connect_fail()) {
4681-
// What does our policy say?
4682-
if (t_state.txn_conf->connect_down_policy == 2 ||
4683-
t_state.txn_conf->connect_down_policy == 3) { // Any connection error through TLS handshake
4684-
retval = true;
4685-
} else if (t_state.txn_conf->connect_down_policy == 1) { // Any connection error through TCP
4686-
retval = t_state.current.server->connect_result != -ENET_SSL_CONNECT_FAILED;
4687-
}
4679+
int const policy = t_state.txn_conf->connect_down_policy;
4680+
4681+
// Policy 1: any TCP-level connect error (excluding TLS handshake failures).
4682+
if (policy == 1 && t_state.current.server->had_connect_fail()) {
4683+
return t_state.current.server->connect_result != -ENET_SSL_CONNECT_FAILED;
46884684
}
4689-
// Policy 3 additionally marks the server down on transaction inactive timeout,
4690-
// even when had_connect_fail() is false (connect_result was cleared at CONNECTION_ALIVE).
4691-
if (!retval && t_state.txn_conf->connect_down_policy == 3) {
4692-
retval = (t_state.current.server->state == HttpTransact::INACTIVE_TIMEOUT);
4685+
4686+
// Policy 2+: any connect error including TLS handshake failures.
4687+
if (policy >= 2 && t_state.current.server->had_connect_fail()) {
4688+
return true;
46934689
}
4694-
return retval;
4690+
4691+
// Policy 3+: inactive timeout (connect_result was cleared at CONNECTION_ALIVE).
4692+
if (policy >= 3 && t_state.current.server->state == HttpTransact::INACTIVE_TIMEOUT) {
4693+
return true;
4694+
}
4695+
4696+
// Policy 4+: origin closed a fresh connection before sending any response bytes.
4697+
// Excludes two cases:
4698+
// - Reused keep-alive connection: there is a known race between ATS reusing and the origin closing it.
4699+
// - Multiplexed origins (HTTP/2): stream-level failure does not indicate a connection failure.
4700+
if (policy >= 4) {
4701+
bool multiplexed = false;
4702+
auto ssn = server_txn->get_proxy_ssn();
4703+
if (ssn != nullptr) {
4704+
multiplexed = static_cast<PoolableSession *>(ssn)->is_multiplexing();
4705+
}
4706+
if (!multiplexed && server_txn->is_first_transaction() && server_response_hdr_bytes == 0) {
4707+
return true;
4708+
}
4709+
}
4710+
4711+
return false;
46954712
}
46964713

46974714
void

tests/gold_tests/connect_down_policy/connect_down_policy_3.test.py renamed to tests/gold_tests/connect_down_policy/connect_down_policy.test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,6 @@ def run(self):
111111

112112
# Policy 2: inactive timeout should NOT mark the origin down.
113113
ConnectDownPolicy3Test(policy=2, expect_mark_down=False).run()
114+
115+
# Policy 4: origin closes connection without sending any response.
116+
Test.ATSReplayTest(replay_file="replay/connect_down_policy_4.replay.yaml")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
`` Server closed connection while reading response header. (origin backend1.example.com:``/path/)
2+
`` CONNECT : SUCCESS [0] connecting to 127.0.0.1:`` for host='one.example.com' url='http://backend1.example.com:``/path/' fail_count='1' marking down
3+
`` DNS Error: no valid server http://backend1.example.com:``/path/
4+
`` Server closed connection while reading response header. (origin backend2.example.com:``/path/)
5+
`` CONNECT : SUCCESS [0] connecting to 127.0.0.1:`` for host='two.example.com' url='http://backend2.example.com:``/path/' fail_count='1' marking down
6+
`` DNS Error: no valid server http://backend2.example.com:``/path/
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
#
18+
# This replay file assumes that caching is disabled.
19+
#
20+
21+
meta:
22+
version: "1.0"
23+
24+
# Configuration section for autest integration
25+
autest:
26+
description: 'Verify connect down policy 4 - origin closes connection without sending a response'
27+
28+
dns:
29+
name: 'dns-policy-4'
30+
records:
31+
backend1.example.com: ["127.0.0.1"]
32+
backend2.example.com: ["127.0.0.1"]
33+
34+
server:
35+
name: 'server-policy-4'
36+
37+
client:
38+
name: 'client-policy-4'
39+
40+
ats:
41+
name: 'ts-policy-4'
42+
process_config:
43+
enable_cache: false
44+
45+
records_config:
46+
proxy.config.diags.debug.enabled: 1
47+
proxy.config.diags.debug.tags: 'http|hostdb'
48+
proxy.config.http.connect.down.policy: 4
49+
proxy.config.http.connect_attempts_rr_retries: 0
50+
proxy.config.http.connect_attempts_max_retries: 0
51+
proxy.config.http.connect_attempts_max_retries_down_server: 0
52+
proxy.config.http.connect_attempts_timeout: 1
53+
proxy.config.http.down_server.cache_time: 5
54+
55+
remap_config:
56+
- from: "http://one.example.com/"
57+
to: "http://backend1.example.com:{SERVER_HTTP_PORT}/"
58+
- from: "http://two.example.com/"
59+
to: "http://backend2.example.com:{SERVER_HTTP_PORT}/"
60+
61+
log_validation:
62+
error_log:
63+
gold_file: "gold/connect_down_policy_4_error_log.gold"
64+
65+
sessions:
66+
- transactions:
67+
# on_connect: refuse - Server accepts TCP connection but immediately closes it without sending any HTTP response
68+
- client-request:
69+
method: GET
70+
url: /path/
71+
version: '1.1'
72+
headers:
73+
fields:
74+
- [Host, one.example.com]
75+
- [uuid, 1]
76+
77+
server-response:
78+
on_connect: refuse
79+
80+
proxy-response:
81+
status: 502
82+
83+
# Verify the origin is marked down after the close.
84+
- client-request:
85+
method: GET
86+
url: /path/
87+
version: '1.1'
88+
headers:
89+
fields:
90+
- [Host, one.example.com]
91+
- [uuid, 10]
92+
93+
proxy-request:
94+
expect: absent
95+
96+
server-response:
97+
status: 200
98+
99+
proxy-response:
100+
status: 500
101+
102+
# on_connect: reset - Server accepts TCP connection but immediately reset it without sending any HTTP response
103+
- client-request:
104+
method: GET
105+
url: /path/
106+
version: '1.1'
107+
headers:
108+
fields:
109+
- [Host, two.example.com]
110+
- [uuid, 2]
111+
112+
server-response:
113+
on_connect: reset
114+
115+
proxy-response:
116+
status: 502
117+
118+
# Verify the origin is marked down after the reset.
119+
- client-request:
120+
method: GET
121+
url: /path/
122+
version: '1.1'
123+
headers:
124+
fields:
125+
- [Host, two.example.com]
126+
- [uuid, 20]
127+
128+
proxy-request:
129+
expect: absent
130+
131+
server-response:
132+
status: 200
133+
134+
proxy-response:
135+
status: 500

0 commit comments

Comments
 (0)