Skip to content

Commit 51d5d0e

Browse files
ThomasK33claude
andauthored
fix(server): close connection on malformed WebSocket frames instead of stalling (#258)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent 41ef22b commit 51d5d0e

3 files changed

Lines changed: 381 additions & 19 deletions

File tree

lua/claudecode/server/client.lua

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,33 @@ function M.process_data(client, data, on_message, on_close, on_error, auth_token
114114
end
115115

116116
while #client.buffer >= 2 do -- Minimum frame size
117-
local parsed_frame, bytes_consumed = frame.parse_frame(client.buffer)
117+
-- Stop if a prior frame (or a TCP error) already initiated teardown. The
118+
-- handle can still be open (Close-frame write / scheduled on_close/on_error
119+
-- pending) and read is not stopped, so a later TCP segment could otherwise
120+
-- re-enter process_data and dispatch frames against a closing/closed client.
121+
if client.state == "closing" or client.state == "closed" then
122+
break
123+
end
124+
125+
local parsed_frame, bytes_consumed, close_code = frame.parse_frame(client.buffer)
118126

119127
if not parsed_frame then
120-
break
128+
if close_code then
129+
-- Fatal protocol violation: send the RFC 6455 Close frame, drop the
130+
-- offending bytes (so they are not re-parsed forever, which previously
131+
-- wedged the connection), and tear the connection down.
132+
--
133+
-- on_error runs the synchronous teardown (tcp.lua _disconnect_client ->
134+
-- _remove_client -> tcp_handle:close()), which would cancel the pending
135+
-- Close-frame write if it ran first. Passing it as close_client's on_done
136+
-- runs it from the write callback, i.e. only after the status code has
137+
-- been flushed, so the peer actually receives the 1002/1007/1009 close.
138+
client.buffer = ""
139+
M.close_client(client, close_code, "Protocol error", function()
140+
on_error(client, "WebSocket protocol error in frame")
141+
end)
142+
end
143+
break -- No close_code means the frame is incomplete; wait for more bytes.
121144
end
122145

123146
-- Frame validation is now handled entirely within frame.parse_frame.
@@ -155,18 +178,35 @@ function M.process_data(client, data, on_message, on_close, on_error, auth_token
155178
vim.schedule(function()
156179
on_close(client, code, reason)
157180
end)
181+
-- A CLOSE is terminal: drop any bytes that followed it (a frame after CLOSE
182+
-- is a protocol violation) and stop iterating, so we neither echo a PONG
183+
-- after our own Close nor dispatch on_message after on_close. Matches the
184+
-- other termination branches.
185+
client.buffer = ""
186+
break
158187
elseif parsed_frame.opcode == frame.OPCODE.PING then
159188
local pong_frame = frame.create_pong_frame(parsed_frame.payload)
160189
client.tcp_handle:write(pong_frame)
161190
elseif parsed_frame.opcode == frame.OPCODE.PONG then
162191
client.last_pong = vim.loop.now()
163192
elseif parsed_frame.opcode == frame.OPCODE.CONTINUATION then
164-
-- Continuation frame - for simplicity, we don't support fragmentation
165-
on_error(client, "Fragmented messages not supported")
166-
M.close_client(client, 1003, "Unsupported data")
193+
-- Continuation frame - for simplicity, we don't support fragmentation.
194+
-- Run on_error from close_client's write callback (so the Close frame is
195+
-- flushed before teardown), then clear the buffer and break so we stop
196+
-- iterating over a connection that is being torn down. See the fatal-frame
197+
-- branch above for the rationale.
198+
M.close_client(client, 1003, "Unsupported data", function()
199+
on_error(client, "Fragmented messages not supported")
200+
end)
201+
client.buffer = ""
202+
break
167203
else
168-
on_error(client, "Unknown WebSocket opcode: " .. parsed_frame.opcode)
169-
M.close_client(client, 1002, "Protocol error")
204+
local opcode = parsed_frame.opcode
205+
M.close_client(client, 1002, "Protocol error", function()
206+
on_error(client, "Unknown WebSocket opcode: " .. opcode)
207+
end)
208+
client.buffer = ""
209+
break
170210
end
171211
end
172212
end
@@ -204,26 +244,59 @@ end
204244
---@param client WebSocketClient The client object
205245
---@param code number|nil Close code (default: 1000)
206246
---@param reason string|nil Close reason
207-
function M.close_client(client, code, reason)
247+
---@param on_done function|nil Optional callback run after the Close frame has been
248+
--- written (or once the connection is already gone). Protocol-violation paths use
249+
--- it to defer error/teardown until the status code has actually been flushed.
250+
function M.close_client(client, code, reason, on_done)
208251
if client.state == "closed" or client.state == "closing" then
252+
if on_done then
253+
vim.schedule(on_done)
254+
end
209255
return
210256
end
211257

212258
code = code or 1000
213259
reason = reason or ""
214260

261+
-- Defensive guard for callers that don't gate on client.state: if the
262+
-- underlying handle is already closing (e.g. a TCP error path closed it),
263+
-- writing a Close frame would target a dead handle and never reach the peer.
264+
-- Just mark the client closed in that case. Mirrors the is_closing() check
265+
-- in tcp.lua's _remove_client.
266+
if client.tcp_handle:is_closing() then
267+
client.state = "closed"
268+
if on_done then
269+
vim.schedule(on_done)
270+
end
271+
return
272+
end
273+
215274
if client.handshake_complete then
216275
local close_frame = frame.create_close_frame(code, reason)
217276
client.tcp_handle:write(close_frame, function()
218277
client.state = "closed"
219-
client.tcp_handle:close()
278+
if not client.tcp_handle:is_closing() then
279+
client.tcp_handle:close()
280+
end
281+
-- Run any teardown only after the Close frame has been flushed, so a
282+
-- caller's synchronous handle close cannot cancel the pending write. libuv
283+
-- always invokes this callback (on success or error), so on_done is never
284+
-- dropped.
285+
if on_done then
286+
on_done()
287+
end
220288
end)
289+
-- Mark as "closing" while the Close frame write/teardown is in flight. The
290+
-- write callback transitions to "closed". Do not clobber the "closed" state
291+
-- set synchronously on the not-handshake-complete branch below.
292+
client.state = "closing"
221293
else
222294
client.state = "closed"
223295
client.tcp_handle:close()
296+
if on_done then
297+
vim.schedule(on_done)
298+
end
224299
end
225-
226-
client.state = "closing"
227300
end
228301

229302
---Check if a client connection is alive

lua/claudecode/server/frame.lua

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,15 @@ M.OPCODE = {
2222
---@field payload string Frame payload data
2323

2424
---Parse a WebSocket frame from binary data
25+
---
26+
---Failure is disambiguated via the optional third return value:
27+
--- * incomplete input ("need more bytes") returns `nil, 0` with NO third value
28+
--- * a fatal protocol violation returns `nil, 0, <close_code>` where the close
29+
--- code is the RFC 6455 status to send in the Close frame (1002/1007/1009)
2530
---@param data string The binary frame data
2631
---@return WebSocketFrame|nil frame The parsed frame, or nil if incomplete/invalid
2732
---@return number bytes_consumed Number of bytes consumed from input
33+
---@return number|nil close_code WebSocket close code for fatal protocol violations
2834
function M.parse_frame(data)
2935
if type(data) ~= "string" then
3036
return nil, 0
@@ -65,18 +71,18 @@ function M.parse_frame(data)
6571
}
6672

6773
if not valid_opcodes[opcode] then
68-
return nil, 0 -- Invalid opcode
74+
return nil, 0, 1002 -- Invalid opcode (protocol error)
6975
end
7076

7177
-- Check for reserved bits (must be 0)
7278
if rsv1 or rsv2 or rsv3 then
73-
return nil, 0 -- Protocol error
79+
return nil, 0, 1002 -- Reserved bits set (protocol error)
7480
end
7581

7682
-- Control frames must have fin=1 and payload ≤ 125 (RFC 6455 Section 5.5)
7783
if opcode >= M.OPCODE.CLOSE then
7884
if not fin or payload_len > 125 then
79-
return nil, 0 -- Protocol violation
85+
return nil, 0, 1002 -- Control frame fragmented or oversized (protocol error)
8086
end
8187
end
8288

@@ -103,13 +109,13 @@ function M.parse_frame(data)
103109

104110
-- Prevent extremely large payloads (DOS protection)
105111
if actual_payload_len > 100 * 1024 * 1024 then -- 100MB limit
106-
return nil, 0
112+
return nil, 0, 1009 -- Message too big
107113
end
108114
end
109115

110116
-- Additional payload length validation
111117
if actual_payload_len < 0 then
112-
return nil, 0 -- Invalid negative length
118+
return nil, 0, 1002 -- Invalid negative length (protocol error)
113119
end
114120

115121
-- Read mask if present
@@ -138,19 +144,19 @@ function M.parse_frame(data)
138144

139145
-- Validate text frame payload is valid UTF-8
140146
if opcode == M.OPCODE.TEXT and not utils.is_valid_utf8(payload) then
141-
return nil, 0 -- Invalid UTF-8 in text frame
147+
return nil, 0, 1007 -- Invalid UTF-8 in text frame (invalid payload data)
142148
end
143149

144150
-- Basic validation for close frame payload
145151
if opcode == M.OPCODE.CLOSE and actual_payload_len > 0 then
146152
if actual_payload_len == 1 then
147-
return nil, 0 -- Close frame with 1 byte payload is invalid
153+
return nil, 0, 1002 -- Close frame with 1 byte payload is invalid (protocol error)
148154
end
149155
-- Allow most close codes for compatibility, only validate UTF-8 for reason text
150156
if actual_payload_len > 2 then
151157
local reason = payload:sub(3)
152158
if not utils.is_valid_utf8(reason) then
153-
return nil, 0 -- Invalid UTF-8 in close reason
159+
return nil, 0, 1007 -- Invalid UTF-8 in close reason (invalid payload data)
154160
end
155161
end
156162
end

0 commit comments

Comments
 (0)