Skip to content

Commit 01253cf

Browse files
widgetiiclaude
andauthored
tests: pin /fastboot wire format + recv-leftover stress (#90)
## Summary Test-only PR — locks in two integration points that previously rested on real-hardware verification alone. ### 1. `/fastboot` binary blob (`tests/test_power_rack.py`) `RackController.fastboot()` packs profile + SPL + agent into a single big-endian binary blob that the pod's C handler in `rack/firmware/main/http_api.c` parses field by field. Any drift between the two breaks bring-up silently — pod returns 400, the host can't tell why. Six new `TestFastbootWireFormat` cases: | Case | Why | |---|---| | `test_packs_expected_layout` | round-trip every field | | `test_prestep1_passthrough` | optional prestep1 not dropped | | `test_success_response_returned_verbatim` | done/elapsed/markers preserved | | `test_pod_500_returns_json_body_not_exception` | protocol failure (500 + JSON) surfaces as a dict, not an exception — callers need `failed_phase` / `error` | | `test_pod_unreachable_raises_power_controller_error` | network-layer errors stay raised | | `test_realistic_blob_size_within_pod_limit` | 41 834 B for a typical hi3516ev300 upload, under the pod's 1 MiB cap | Shared helper `_parse_fastboot_blob()` is the inverse of the host packer; if either side bumps the format, the round-trip test fails loudly. ### 2. Async `recv_packet` leftover stress (`tests/test_agent_protocol.py`) PR #86 added a per-transport leftover buffer so multi-packet TCP chunks don't drop trailing packets. The existing tests covered the two simplest cases (two/three packets in one chunk). Six stress cases added: | Case | Why | |---|---| | `test_frame_split_across_two_reads_recombines` | half a packet, then the other half (TCP MTU split) | | `test_large_stream_50_packets_in_one_chunk` | catches off-by-one in leftover slicing | | `test_recv_response_skips_ready_in_leftover` | READY skipping still works when READYs are in leftover, not on the wire | | `test_per_transport_isolation` | two transports must not share leftover state | | `test_incomplete_frame_in_leftover_blocks_until_timeout` | half a frame must wait for the rest, never spuriously return partial | | `test_corrupt_frame_skipped_then_recovers` | bit-flipped CRC mid-stream must be discarded; parser picks up the next valid frame | ## Test plan - [ ] `uv run pytest tests/ -x -v --ignore=tests/fuzz` — **480 passed / 2 skipped** (was 468) - [ ] `uv run ruff check tests/ src/defib/` - [ ] `uv run mypy src/defib/ --ignore-missing-imports` No production code touched. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Dmitry Ilyin <widgetii@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3f7f3b1 commit 01253cf

2 files changed

Lines changed: 341 additions & 0 deletions

File tree

tests/test_agent_protocol.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,141 @@ async def test_async_path_three_packets_one_chunk(self):
236236
assert seen == [RSP_READY, RSP_DATA, RSP_ACK]
237237

238238

239+
# ---------------------------------------------------------------------------
240+
# Tests: recv_packet async-leftover buffer stress
241+
#
242+
# These exercise the per-transport buffer that recv_packet keeps for
243+
# bytes that arrived after a frame's delimiter — a regression class
244+
# (PR #86) that's worth pinning down with stream-style scenarios:
245+
# split frames across reads, large multi-packet streams, READY
246+
# interleave, per-transport isolation, and timeout behaviour when
247+
# the buffer has incomplete frame data.
248+
# ---------------------------------------------------------------------------
249+
250+
class TestRecvPacketAsyncLeftoverStress:
251+
@pytest.mark.asyncio
252+
async def test_frame_split_across_two_reads_recombines(self) -> None:
253+
"""A single frame can arrive split across two transport reads
254+
(typical for large TCP packets that cross MTU). The parser
255+
must accumulate until the delimiter and parse cleanly."""
256+
from defib.transport.mock import MockTransport
257+
pkt = make_device_packet(RSP_INFO, b"X" * 32)
258+
# Split the packet at an arbitrary mid-frame byte.
259+
split = len(pkt) // 2
260+
t = MockTransport(flush_clears_buffer=False)
261+
t.enqueue_rx(pkt[:split])
262+
t.enqueue_rx(pkt[split:])
263+
264+
cmd, data = await recv_packet(t, timeout=1.0)
265+
assert cmd == RSP_INFO
266+
assert data == b"X" * 32
267+
268+
@pytest.mark.asyncio
269+
async def test_large_stream_50_packets_in_one_chunk(self) -> None:
270+
"""Stress: 50 small packets crammed into one chunk should
271+
all come out, in order. Catches off-by-one bugs in the
272+
leftover slicing."""
273+
from defib.transport.mock import MockTransport
274+
N = 50
275+
payloads = [bytes([i, 0xAA, 0x55]) for i in range(N)]
276+
stream = b"".join(
277+
make_device_packet(RSP_DATA, b"\x00\x00" + p) for p in payloads
278+
)
279+
t = MockTransport(flush_clears_buffer=False)
280+
t.enqueue_rx(stream)
281+
282+
for i in range(N):
283+
cmd, data = await recv_packet(t, timeout=1.0)
284+
assert cmd == RSP_DATA
285+
assert data == b"\x00\x00" + payloads[i], f"packet {i} mismatch"
286+
287+
@pytest.mark.asyncio
288+
async def test_recv_response_skips_ready_in_leftover(self) -> None:
289+
"""The READY-skipping logic of recv_response (used by INFO,
290+
CRC32, etc.) must work even when the READY and the real
291+
response are coalesced into a single chunk via leftover."""
292+
from defib.transport.mock import MockTransport
293+
chunk = (
294+
make_device_packet(RSP_READY, b"DEFIB")
295+
+ make_device_packet(RSP_READY, b"DEFIB")
296+
+ make_device_packet(RSP_INFO, b"PAYLOAD!")
297+
+ make_device_packet(RSP_READY, b"DEFIB") # trailing READY queued
298+
)
299+
t = MockTransport(flush_clears_buffer=False)
300+
t.enqueue_rx(chunk)
301+
302+
cmd, data = await recv_response(t, timeout=1.0)
303+
assert cmd == RSP_INFO
304+
assert data == b"PAYLOAD!"
305+
# The trailing READY is still parseable on the next call —
306+
# leftover survived recv_response's internal recv_packet calls.
307+
cmd2, _ = await recv_packet(t, timeout=1.0)
308+
assert cmd2 == RSP_READY
309+
310+
@pytest.mark.asyncio
311+
async def test_per_transport_isolation(self) -> None:
312+
"""Two transports must not share leftover state. If they did,
313+
bytes from one socket would surface on another's read."""
314+
from defib.transport.mock import MockTransport
315+
pkt_a = make_device_packet(RSP_INFO, b"AAA")
316+
pkt_b = make_device_packet(RSP_DATA, b"\x00\x00BBB")
317+
ta = MockTransport(flush_clears_buffer=False)
318+
tb = MockTransport(flush_clears_buffer=False)
319+
# Two whole packets per transport — leftover gets populated.
320+
ta.enqueue_rx(pkt_a + pkt_a)
321+
tb.enqueue_rx(pkt_b + pkt_b)
322+
323+
# Interleave reads
324+
ca, _ = await recv_packet(ta, timeout=1.0)
325+
cb, _ = await recv_packet(tb, timeout=1.0)
326+
ca2, _ = await recv_packet(ta, timeout=1.0)
327+
cb2, _ = await recv_packet(tb, timeout=1.0)
328+
329+
assert ca == RSP_INFO
330+
assert ca2 == RSP_INFO
331+
assert cb == RSP_DATA
332+
assert cb2 == RSP_DATA
333+
334+
@pytest.mark.asyncio
335+
async def test_incomplete_frame_in_leftover_blocks_until_timeout(self) -> None:
336+
"""A leftover containing only PART of a frame (no delimiter yet)
337+
must wait for more data and time out cleanly if none arrives —
338+
never spuriously return a partial frame."""
339+
from defib.transport.mock import MockTransport
340+
pkt = make_device_packet(RSP_DATA, b"\x00\x00" + b"Z" * 16)
341+
# Half the packet only, no delimiter.
342+
t = MockTransport(flush_clears_buffer=False)
343+
t.enqueue_rx(pkt[: len(pkt) // 2])
344+
345+
with pytest.raises(TransportTimeout):
346+
await recv_packet(t, timeout=0.2)
347+
348+
@pytest.mark.asyncio
349+
async def test_corrupt_frame_skipped_then_recovers(self) -> None:
350+
"""A frame that fails CRC mid-stream must be discarded and the
351+
parser must recover to the next valid frame."""
352+
from defib.transport.mock import MockTransport
353+
# Build a packet, then flip a bit in the middle to corrupt
354+
# the CRC. The parser should clear that frame and pick up the
355+
# next valid one.
356+
good = make_device_packet(RSP_INFO, b"GOOD")
357+
broken = bytearray(make_device_packet(RSP_DATA, b"\x00\x00" + b"BAD!"))
358+
broken[4] ^= 0x42 # flip a payload bit → CRC32 mismatch
359+
ok2 = make_device_packet(RSP_ACK, bytes([ACK_OK]))
360+
361+
t = MockTransport(flush_clears_buffer=False)
362+
t.enqueue_rx(bytes(broken) + good + ok2)
363+
364+
# First call: parser sees broken frame (CRC fail, discards),
365+
# then sees the GOOD packet.
366+
cmd, data = await recv_packet(t, timeout=1.0)
367+
assert cmd == RSP_INFO
368+
assert data == b"GOOD"
369+
# Second call should hit the trailing ACK via leftover.
370+
cmd2, _ = await recv_packet(t, timeout=1.0)
371+
assert cmd2 == RSP_ACK
372+
373+
239374
# ---------------------------------------------------------------------------
240375
# Tests: send_packet
241376
# ---------------------------------------------------------------------------

tests/test_power_rack.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,209 @@ def raise_oserr(req: Any, timeout: float | None = None) -> None:
197197

198198
def _explode(*args: Any, **kwargs: Any) -> Any: # noqa: ANN401
199199
raise AssertionError("urlopen must not be called in this test")
200+
201+
202+
# ---------------------------------------------------------------------------
203+
# fastboot() — binary blob wire format, success/failure shapes
204+
# ---------------------------------------------------------------------------
205+
206+
def _parse_fastboot_blob(blob: bytes) -> dict[str, object]:
207+
"""Decode the binary blob the pod's /fastboot endpoint expects, so
208+
tests can assert on what the host packed."""
209+
off = 0
210+
211+
def u32() -> int:
212+
nonlocal off
213+
v = int.from_bytes(blob[off:off + 4], "big")
214+
off += 4
215+
return v
216+
217+
def u16() -> int:
218+
nonlocal off
219+
v = int.from_bytes(blob[off:off + 2], "big")
220+
off += 2
221+
return v
222+
223+
def slice_(n: int) -> bytes:
224+
nonlocal off
225+
v = blob[off:off + n]
226+
off += n
227+
return v
228+
229+
spl_address = u32()
230+
ddr_step_address = u32()
231+
uboot_address = u32()
232+
prestep0 = slice_(u16())
233+
ddrstep0 = slice_(u16())
234+
prestep1 = slice_(u16())
235+
spl = slice_(u32())
236+
agent = slice_(u32())
237+
assert off == len(blob), f"trailing bytes ({len(blob) - off}) past parsed fields"
238+
return {
239+
"spl_address": spl_address,
240+
"ddr_step_address": ddr_step_address,
241+
"uboot_address": uboot_address,
242+
"prestep0": prestep0,
243+
"ddrstep0": ddrstep0,
244+
"prestep1": prestep1,
245+
"spl": spl,
246+
"agent": agent,
247+
}
248+
249+
250+
class TestFastbootWireFormat:
251+
"""Round-trip the binary blob the pod's /fastboot endpoint expects.
252+
253+
The C side in rack/firmware/main/http_api.c reads:
254+
[u32 spl_address][u32 ddr_step_address][u32 uboot_address]
255+
[u16 prestep0_len][prestep0][u16 ddrstep0_len][ddrstep0]
256+
[u16 prestep1_len][prestep1][u32 spl_len][spl][u32 agent_len][agent]
257+
all big-endian. Pin that down so a host/pod mismatch breaks loudly.
258+
"""
259+
260+
@pytest.mark.asyncio
261+
async def test_packs_expected_layout(
262+
self, monkeypatch: pytest.MonkeyPatch,
263+
) -> None:
264+
ctrl = RackController(host="pod", port=8080)
265+
body = b'{"success":true,"last_phase":"done","elapsed_ms":4521}'
266+
267+
with patched_urlopen(monkeypatch, body=body) as rec:
268+
await ctrl.fastboot(
269+
spl_address=0x04010500,
270+
ddr_step_address=0x04013000,
271+
uboot_address=0x41000000,
272+
prestep0=b"\x01\x02\x03\x04",
273+
ddrstep0=b"\x05\x06",
274+
prestep1=None,
275+
spl=b"S" * 256,
276+
agent=b"A" * 128,
277+
)
278+
279+
assert len(rec.calls) == 1
280+
method, url, data = rec.calls[0]
281+
assert method == "POST"
282+
assert url == "http://pod:8080/fastboot"
283+
assert data is not None
284+
parsed = _parse_fastboot_blob(data)
285+
assert parsed["spl_address"] == 0x04010500
286+
assert parsed["ddr_step_address"] == 0x04013000
287+
assert parsed["uboot_address"] == 0x41000000
288+
assert parsed["prestep0"] == b"\x01\x02\x03\x04"
289+
assert parsed["ddrstep0"] == b"\x05\x06"
290+
assert parsed["prestep1"] == b"" # None → empty
291+
assert parsed["spl"] == b"S" * 256
292+
assert parsed["agent"] == b"A" * 128
293+
294+
@pytest.mark.asyncio
295+
async def test_prestep1_passthrough(
296+
self, monkeypatch: pytest.MonkeyPatch,
297+
) -> None:
298+
ctrl = RackController(host="pod", port=8080)
299+
prestep1 = bytes(range(64))
300+
with patched_urlopen(monkeypatch) as rec:
301+
await ctrl.fastboot(
302+
spl_address=0, ddr_step_address=0, uboot_address=0,
303+
prestep0=b"", ddrstep0=b"", prestep1=prestep1,
304+
spl=b"", agent=b"",
305+
)
306+
parsed = _parse_fastboot_blob(rec.calls[0][2])
307+
assert parsed["prestep1"] == prestep1
308+
309+
@pytest.mark.asyncio
310+
async def test_success_response_returned_verbatim(
311+
self, monkeypatch: pytest.MonkeyPatch,
312+
) -> None:
313+
ctrl = RackController(host="pod", port=8080)
314+
body = (
315+
b'{"success":true,"last_phase":"done",'
316+
b'"elapsed_ms":4521,"handshake_markers":7}'
317+
)
318+
with patched_urlopen(monkeypatch, body=body):
319+
result = await ctrl.fastboot(
320+
spl_address=0, ddr_step_address=0, uboot_address=0,
321+
prestep0=b"", ddrstep0=b"", prestep1=None,
322+
spl=b"", agent=b"",
323+
)
324+
assert result == {
325+
"success": True,
326+
"last_phase": "done",
327+
"elapsed_ms": 4521,
328+
"handshake_markers": 7,
329+
}
330+
331+
@pytest.mark.asyncio
332+
async def test_pod_500_returns_json_body_not_exception(
333+
self, monkeypatch: pytest.MonkeyPatch,
334+
) -> None:
335+
"""Pod returns 500 + JSON for protocol failure. The host must
336+
surface that JSON (so callers can read failed_phase / error)
337+
rather than raising — a HTTPError surfaces ONLY for non-JSON
338+
responses."""
339+
import urllib.error
340+
err_body = (
341+
b'{"success":false,"last_phase":"prestep0",'
342+
b'"failed_phase":"prestep0","error":"PRESTEP0 HEAD",'
343+
b'"elapsed_ms":214,"handshake_markers":5}'
344+
)
345+
346+
def http500(req: Any, timeout: float | None = None) -> None:
347+
raise urllib.error.HTTPError(
348+
url=req.full_url, code=500, msg="Internal Server Error",
349+
hdrs=None, # type: ignore[arg-type]
350+
fp=io.BytesIO(err_body),
351+
)
352+
353+
monkeypatch.setattr(rack_mod.urllib.request, "urlopen", http500)
354+
ctrl = RackController(host="pod", port=8080)
355+
result = await ctrl.fastboot(
356+
spl_address=0, ddr_step_address=0, uboot_address=0,
357+
prestep0=b"", ddrstep0=b"", prestep1=None,
358+
spl=b"", agent=b"",
359+
)
360+
assert result["success"] is False
361+
assert result["failed_phase"] == "prestep0"
362+
assert "PRESTEP0" in str(result["error"])
363+
364+
@pytest.mark.asyncio
365+
async def test_pod_unreachable_raises_power_controller_error(
366+
self, monkeypatch: pytest.MonkeyPatch,
367+
) -> None:
368+
import urllib.error
369+
370+
def raise_urlerr(req: Any, timeout: float | None = None) -> None:
371+
raise urllib.error.URLError("no route to host")
372+
373+
monkeypatch.setattr(rack_mod.urllib.request, "urlopen", raise_urlerr)
374+
ctrl = RackController(host="pod", port=8080)
375+
with pytest.raises(PowerControllerError, match="rack unreachable"):
376+
await ctrl.fastboot(
377+
spl_address=0, ddr_step_address=0, uboot_address=0,
378+
prestep0=b"", ddrstep0=b"", prestep1=None,
379+
spl=b"", agent=b"",
380+
)
381+
382+
@pytest.mark.asyncio
383+
async def test_realistic_blob_size_within_pod_limit(
384+
self, monkeypatch: pytest.MonkeyPatch,
385+
) -> None:
386+
"""Pod caps body at 1 MiB (FASTBOOT_MAX_BODY). A typical
387+
upload is profile (~140 B) + SPL (~24 KB) + agent (~17 KB)
388+
≈ 41 KB. Make sure our packing matches that ballpark."""
389+
ctrl = RackController(host="pod", port=8080)
390+
prestep = b"\xab" * 64
391+
ddr = b"\xcd" * 64
392+
spl = b"\x90" * 24_576
393+
agent = b"\x55" * 17_104
394+
with patched_urlopen(monkeypatch) as rec:
395+
await ctrl.fastboot(
396+
spl_address=0x04010500,
397+
ddr_step_address=0x04013000,
398+
uboot_address=0x41000000,
399+
prestep0=prestep, ddrstep0=ddr, prestep1=None,
400+
spl=spl, agent=agent,
401+
)
402+
# 3*u32 + 3*u16 + 64 + 64 + 0 + u32 + 24576 + u32 + 17104
403+
# = 12 + 6 + 128 + 4 + 24576 + 4 + 17104 = 41834
404+
assert len(rec.calls[0][2]) == 41834
405+
assert len(rec.calls[0][2]) < 1024 * 1024 # < FASTBOOT_MAX_BODY

0 commit comments

Comments
 (0)