Skip to content

Commit 44d1bf5

Browse files
bushidocodesclaude
andcommitted
test: add repro script for issue #185 TCP-reset bug
Self-contained script that reproduces the "connection reset by peer" / "Unsolicited response received on idle HTTP channel" failure behind #185: launches sledgert with a single valid route, then POSTs a sizeable body to a non-existent route so the server emits a 404 (matched at the request line, in on_client_request_receiving) and close()s while the body is still in flight, producing a RST. Reports the reset count and prints BUG REPRODUCED when nonzero. Run on an unpatched checkout to see the bug; on this branch the graceful-close fix drops the reset count to 0. Helps reviewers reproduce the concrete failure case requested in PR #389 review. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 9e66463 commit 44d1bf5

1 file changed

Lines changed: 179 additions & 0 deletions

File tree

repro_issue_185.sh

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#!/usr/bin/env bash
2+
#
3+
# repro_issue_185.sh — Reproduce the TCP-reset bug behind issue #185 / PR #389.
4+
#
5+
# Symptom: `hey`/clients see "connection reset by peer" (and pooled keepalive
6+
# clients log `Unsolicited response received on idle HTTP channel starting with
7+
# "HTTP/1.1 ..."`) when SLEdge answers a request *before* it has finished reading
8+
# the request body.
9+
#
10+
# Why a plain bodyless GET does NOT reproduce it:
11+
# 429/500/503 are emitted from on_client_request_received(), which only runs
12+
# after the FULL request (body included) has been read. So those are clean.
13+
#
14+
# The reproducible path is 404. In on_client_request_receiving()
15+
# (runtime/src/listener_thread.c, ~line 200) the route is matched the moment the
16+
# URL is parsed from the *request line* — before the body arrives:
17+
#
18+
# if (session->route == NULL && strlen(session->http_request.full_url) > 0) {
19+
# route = http_router_match_route(...);
20+
# if (route == NULL) { ...404...; on_client_response_header_sending(); return; }
21+
# }
22+
#
23+
# So: POST a sizeable body to a NON-existent route. SLEdge writes the 404 and
24+
# close()es the socket while the client is still sending. On Linux, close() with
25+
# unread data in the kernel receive buffer discards it and emits a RST instead of
26+
# a graceful FIN -> "connection reset by peer".
27+
#
28+
# Expected result:
29+
# * On master / fix/docker-dev-setup (unpatched): a nonzero number of
30+
# "connection reset by peer" errors -> BUG REPRODUCED.
31+
# * On fix/issue-185-graceful-close (patched tcp_session_close): 0 resets.
32+
#
33+
# Usage:
34+
# ./repro_issue_185.sh
35+
#
36+
# Tunables (env vars):
37+
# PORT=10000 tenant listen port
38+
# REQUESTS=2400 total requests (hey -n)
39+
# CONCURRENCY=32 concurrent connections (hey -c)
40+
# BODY_BYTES=100000 request body size (~100 KB); larger = more resets
41+
#
42+
set -euo pipefail
43+
44+
PORT="${PORT:-10000}"
45+
REQUESTS="${REQUESTS:-2400}"
46+
CONCURRENCY="${CONCURRENCY:-32}"
47+
BODY_BYTES="${BODY_BYTES:-100000}"
48+
49+
# --- locate the repo (this script lives at the repo root) ---------------------
50+
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
51+
BIN_DIR="$REPO_ROOT/runtime/bin"
52+
SLEDGERT="$BIN_DIR/sledgert"
53+
WASM="$BIN_DIR/empty.wasm.so"
54+
55+
red() { printf '\033[1;31m%s\033[0m\n' "$*"; }
56+
green() { printf '\033[0;32m%s\033[0m\n' "$*"; }
57+
info() { printf '\033[0;36m==> %s\033[0m\n' "$*"; }
58+
59+
# --- prerequisites ------------------------------------------------------------
60+
if ! command -v hey >/dev/null 2>&1; then
61+
red "ERROR: 'hey' is not installed. Install it with:"
62+
echo " go install github.com/rakyll/hey@latest (then add \$(go env GOPATH)/bin to PATH)"
63+
echo " or: apt-get install -y hey"
64+
exit 1
65+
fi
66+
67+
if [[ ! -x "$SLEDGERT" ]]; then
68+
red "ERROR: $SLEDGERT not found. Build the runtime first, e.g.:"
69+
echo " make runtime # builds runtime/bin/sledgert"
70+
echo " make install # full build incl. wasm apps"
71+
exit 1
72+
fi
73+
74+
if [[ ! -f "$WASM" ]]; then
75+
red "ERROR: $WASM not found. Build the sample apps first, e.g.:"
76+
echo " make install # builds the wasm apps incl. empty.wasm.so"
77+
exit 1
78+
fi
79+
80+
# Warn (don't block) if running on a branch that already contains the fix.
81+
if grep -q 'shutdown(client_socket' "$REPO_ROOT/runtime/include/tcp_session.h" 2>/dev/null; then
82+
info "NOTE: tcp_session.h contains the graceful-close fix — you are on a PATCHED"
83+
info " checkout, so you should see 0 resets (the fix working). To see the BUG,"
84+
info " check out an unpatched branch (e.g. master) and 'make runtime' first."
85+
fi
86+
87+
# --- write a minimal tenant spec ----------------------------------------------
88+
SPEC="$(mktemp /tmp/issue185-spec.XXXXXX.json)"
89+
cat > "$SPEC" <<EOF
90+
[
91+
{
92+
"name": "gwu",
93+
"port": $PORT,
94+
"routes": [
95+
{
96+
"route": "/empty",
97+
"path": "empty.wasm.so",
98+
"admissions-percentile": 70,
99+
"relative-deadline-us": 50000,
100+
"http-resp-content-type": "text/plain"
101+
}
102+
]
103+
}
104+
]
105+
EOF
106+
107+
# --- generate the request body ------------------------------------------------
108+
BODY="$(mktemp /tmp/issue185-body.XXXXXX)"
109+
head -c "$BODY_BYTES" /dev/zero | tr '\0' 'x' > "$BODY"
110+
111+
# --- launch sledgert ----------------------------------------------------------
112+
LOG="$(mktemp /tmp/issue185-sledge.XXXXXX.log)"
113+
SLEDGE_PID=""
114+
115+
cleanup() {
116+
[[ -n "$SLEDGE_PID" ]] && kill "$SLEDGE_PID" 2>/dev/null || true
117+
rm -f "$SPEC" "$BODY" "$LOG"
118+
}
119+
trap cleanup EXIT
120+
121+
info "Starting sledgert on port $PORT (route /empty -> empty.wasm.so)"
122+
# sledgert resolves the relative "empty.wasm.so" path against its CWD, and needs
123+
# runtime/bin on LD_LIBRARY_PATH for libsledge/libck. 'exec' makes sledgert
124+
# replace the subshell so $! is sledgert's own PID (so cleanup kills it).
125+
( cd "$BIN_DIR" && exec env LD_LIBRARY_PATH="$BIN_DIR:${LD_LIBRARY_PATH:-}" "$SLEDGERT" "$SPEC" ) >"$LOG" 2>&1 &
126+
SLEDGE_PID=$!
127+
128+
# --- wait for the tenant port to accept connections ---------------------------
129+
info "Waiting for port $PORT to come up..."
130+
for _ in $(seq 1 50); do
131+
if ! kill -0 "$SLEDGE_PID" 2>/dev/null; then
132+
red "sledgert exited during startup. Log:"
133+
cat "$LOG"
134+
exit 1
135+
fi
136+
if (exec 3<>"/dev/tcp/127.0.0.1/$PORT") 2>/dev/null; then
137+
exec 3>&- 3<&- 2>/dev/null || true
138+
break
139+
fi
140+
sleep 0.2
141+
done
142+
143+
# Sanity checks: valid route -> 200, missing route (bodyless) -> 404 (clean).
144+
ok="$(curl -s -o /dev/null -w '%{http_code}' -X POST --data hi "http://127.0.0.1:$PORT/empty" || true)"
145+
nf="$(curl -s -o /dev/null -w '%{http_code}' "http://127.0.0.1:$PORT/nope" || true)"
146+
info "Sanity: POST /empty -> $ok , GET /nope (bodyless) -> $nf"
147+
if [[ "$ok" != "200" ]]; then
148+
red "sledgert is not serving the valid route as expected. Log:"
149+
cat "$LOG"
150+
exit 1
151+
fi
152+
153+
# --- the load that triggers the bug -------------------------------------------
154+
info "Firing $REQUESTS POSTs ($BODY_BYTES-byte body, concurrency $CONCURRENCY) at /nope (non-existent route)"
155+
HEY_OUT="$(mktemp /tmp/issue185-hey.XXXXXX)"
156+
hey -n "$REQUESTS" -c "$CONCURRENCY" -m POST -D "$BODY" "http://127.0.0.1:$PORT/nope" > "$HEY_OUT" 2>&1 || true
157+
158+
echo
159+
echo "----- hey status code distribution -----"
160+
sed -n '/Status code distribution/,/^$/p' "$HEY_OUT" || true
161+
162+
# Each reset is a distinct connection (unique source port), so count lines.
163+
RESETS="$(grep -c 'connection reset by peer' "$HEY_OUT" || true)"
164+
EPIPES="$(grep -c 'broken pipe' "$HEY_OUT" || true)"
165+
rm -f "$HEY_OUT"
166+
167+
echo
168+
echo "============================================================"
169+
echo " 'connection reset by peer' errors : $RESETS"
170+
echo " 'broken pipe' (EPIPE) errors : $EPIPES (known large-body limitation)"
171+
echo "============================================================"
172+
if [[ "${RESETS:-0}" -gt 0 ]]; then
173+
red "BUG REPRODUCED: SLEdge sent RSTs on early 404 responses (issue #185)."
174+
echo "On fix/issue-185-graceful-close this count drops to 0."
175+
else
176+
green "No resets observed. If you are on the patched branch, this is the FIX working."
177+
echo "If you expected the bug: confirm you 'make runtime' on an unpatched branch,"
178+
echo "and try a larger BODY_BYTES (e.g. BODY_BYTES=1000000) or higher CONCURRENCY."
179+
fi

0 commit comments

Comments
 (0)