Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 14 additions & 24 deletions agent/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -969,30 +969,20 @@ static void handle_set_baud(const uint8_t *data, uint32_t len) {
/* Drain any garbage from baud rate transition */
while (uart_readable()) uart_getc();

/* Wait for host to confirm with any valid command within 3 seconds.
* If nothing arrives, revert to 115200 — the host may have failed
* to switch or the new baud rate doesn't work on this link. */
uint8_t pkt[MAX_PAYLOAD + 16];
uint32_t pkt_len = 0;
uint8_t cmd = proto_recv(pkt, &pkt_len, 3000);
if (cmd == 0) {
/* No valid command — revert */
uart_set_baud(115200);
while (uart_readable()) uart_getc();
at_default_baud = 1;
} else {
/* Got a valid command at new baud — confirmed working */
at_default_baud = (baud == 115200);
switch (cmd) {
case CMD_INFO: handle_info(); break;
case CMD_READ: handle_read(pkt, pkt_len); break;
case CMD_WRITE: handle_write(pkt, pkt_len); break;
case CMD_CRC32: handle_crc32_cmd(pkt, pkt_len); break;
case CMD_SCAN: handle_scan(pkt, pkt_len); break;
case CMD_MARK_BAD: handle_mark_bad(pkt, pkt_len); break;
default: proto_send_ack(ACK_OK); break;
}
}
/* Stay at the new baud unconditionally. Earlier versions waited up
* to 3 s for a verification packet and reverted to 115200 otherwise,
* but proto_recv's "3 s" deadline is a CPU-speed-dependent busy-wait
* (≈25-cycle loop × 100·timeout_ms iterations) — on a fast Cortex-A7
* the actual window collapses to <300 ms, which is shorter than the
* host-side WiFi-RTT for the rack pod's `POST /uart/baud` (≈1 s).
* The agent reverted before the host's verification packet could
* arrive at the new rate, leaving host/agent permanently mismatched
* and reading misclocked garbage.
*
* Failure mode if the host can't reach us at the new baud: agent is
* unrecoverable until the next power-cycle / fastboot, which the
* rack pod or RouterOS can both do trivially. */
at_default_baud = (baud == 115200);
}

int main(void) {
Expand Down
54 changes: 44 additions & 10 deletions src/defib/agent/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,18 +753,20 @@ async def set_baud(self, baud: int) -> bool:

Protocol: send SET_BAUD command, receive ACK at current baud,
then both sides switch. Verifies with INFO at new baud.
Falls back to original baud on failure.
Falls back to ``FALLBACK_BAUD`` on failure.

Routes through :meth:`Transport.set_baudrate` — pyserial
transports update their port; RFC 2217 sends SET-BAUDRATE; the
rack pod's :class:`RackTransport` POSTs to ``/uart/baud``.
Transports without out-of-band baud signalling raise
``NotImplementedError`` and we abort cleanly so the caller can
stay at ``FALLBACK_BAUD``.
"""
self._clear_rx_buffers()

import asyncio

port = getattr(self._transport, '_port', None)
if port is None:
logger.error("set_baud requires serial transport with _port")
return False

old_baud = port.baudrate
old_baud = self._current_baud
payload = struct.pack("<I", baud)
await send_packet(self._transport, CMD_SET_BAUD, payload)

Expand All @@ -775,22 +777,54 @@ async def set_baud(self, baud: int) -> bool:

# Agent has switched — now switch host side
await asyncio.sleep(0.05) # Brief pause for agent to complete switch
port.baudrate = baud
try:
await self._transport.set_baudrate(baud)
except NotImplementedError:
logger.error(
"set_baud: transport has no out-of-band baud control; "
"cannot sync host side. Wire mismatch — staying at %d.",
old_baud,
)
# Best-effort: nudge the agent back to fallback so we don't
# end up with a permanently mismatched link.
try:
fallback = struct.pack("<I", FALLBACK_BAUD)
await send_packet(self._transport, CMD_SET_BAUD, fallback)
except Exception:
pass
return False

# Verify communication at new baud
# Verify communication at new baud. Drain first — any bytes that
# were on the wire DURING the baud transition (e.g. the agent's
# post-ACK drain residue, or bridge UART RX bytes clocked at the
# wrong rate during the host→pod /uart/baud RTT) would be parsed
# as junk at the new rate and corrupt the next packet.
await self._transport.flush_input()
# Clear the async-leftover buffer the agent protocol parser keeps
# so any half-packet bytes left from the previous rate don't
# contaminate the verification read.
try:
from defib.agent.protocol import _async_leftover
_async_leftover.pop(id(self._transport), None)
except ImportError:
pass
await asyncio.sleep(0.05)
try:
await send_packet(self._transport, CMD_INFO)
cmd, data = await recv_response(self._transport, timeout=3.0)
if cmd == RSP_INFO:
logger.info("Baud rate switched to %d", baud)
self._current_baud = baud
return True
except Exception:
pass

# Failed — switch back
logger.warning("Verification at %d baud failed, reverting to %d", baud, old_baud)
port.baudrate = old_baud
try:
await self._transport.set_baudrate(old_baud)
except NotImplementedError:
pass
return False

async def mark_bad_block(self, block: int) -> bool:
Expand Down
14 changes: 14 additions & 0 deletions src/defib/transport/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,19 @@ async def unread(self, data: bytes) -> None:
"""
raise NotImplementedError("This transport does not support unread()")

async def set_baudrate(self, baud: int) -> None:
"""Change the UART baud rate on both ends of the link.

Real serial transports set their pyserial ``baudrate`` property.
RFC 2217 sends a SET-BAUDRATE sub-option to the remote bridge.
Bridges that expose an out-of-band control channel (e.g. the
rack pod's ``POST /uart/baud``) call into it.

Plain TCP-bridged UARTs that have no signalling for baud rate
changes raise ``NotImplementedError`` and the caller must keep
the wire at ``115200``.
"""
raise NotImplementedError("This transport does not support set_baudrate()")

async def close(self) -> None:
"""Close the transport. Default implementation does nothing."""
94 changes: 94 additions & 0 deletions src/defib/transport/rack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""TCP transport for rack pods, with out-of-band baud rate control.

A rack pod's TCP UART bridge passes bytes verbatim — no in-band signal
for the bridge to change its UART baud rate. The pod exposes a separate
HTTP control plane (``POST /uart/baud``) for that, so callers like
:class:`~defib.agent.client.FlashAgentClient.set_baud` can sync both
ends of the link when the on-device agent jumps to a faster rate.

``RackTransport`` extends :class:`~defib.transport.socket.SocketTransport`
with the HTTP base URL of the controlling pod and an
:meth:`set_baudrate` override that POSTs the new rate. URL scheme:

``rack://host[:bridge_port][?api=http_port]``

defaults: ``bridge_port=9000``, ``http_port=8080``.
"""

from __future__ import annotations

import asyncio
import json
import logging
import socket as sock_mod
import urllib.error
import urllib.request

from defib.transport.base import TransportError
from defib.transport.socket import SocketTransport

logger = logging.getLogger(__name__)


class RackTransport(SocketTransport):
"""SocketTransport + HTTP control channel for the pod's /uart/baud."""

def __init__(self, conn: sock_mod.socket, http_base: str) -> None:
super().__init__(conn)
self._http_base = http_base.rstrip("/")

@classmethod
async def create_rack(
cls,
host: str,
bridge_port: int = 9000,
http_port: int = 8080,
) -> RackTransport:
try:
s = sock_mod.socket(sock_mod.AF_INET, sock_mod.SOCK_STREAM)
s.setblocking(False)
s.setsockopt(sock_mod.IPPROTO_TCP, sock_mod.TCP_NODELAY, 1)
loop = asyncio.get_event_loop()
await loop.sock_connect(s, (host, bridge_port))
except OSError as e:
raise TransportError(
f"Failed to connect to rack pod {host}:{bridge_port}: {e}"
) from e
http_base = f"http://{host}:{http_port}"
logger.info(
"Connected to rack pod: tcp://%s:%d (control %s)",
host, bridge_port, http_base,
)
return cls(s, http_base)

async def set_baudrate(self, baud: int) -> None:
"""Sync the pod's UART side to ``baud`` via POST /uart/baud.

The on-device agent flips to ``baud`` after its own CMD_SET_BAUD
handler; we POST here to bring the bridge's UART side in line.
Without this, the host writes at host-imagined ``baud`` but the
bridge keeps clocking at 115200 — every byte gets mangled.
"""
url = f"{self._http_base}/uart/baud"
body = json.dumps({"rate": int(baud)}).encode("ascii")
logger.info("rack POST %s rate=%d", url, baud)
await asyncio.to_thread(self._post_baud_sync, url, body)

@staticmethod
def _post_baud_sync(url: str, body: bytes) -> None:
req = urllib.request.Request(
url, data=body, method="POST",
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=5.0) as resp:
resp.read()
except urllib.error.HTTPError as e:
detail = e.read().decode("utf-8", "replace")[:200]
raise TransportError(
f"rack HTTP {e.code} on {url}: {detail}"
) from e
except (urllib.error.URLError, TimeoutError, OSError) as e:
raise TransportError(
f"rack unreachable at {url}: {e}"
) from e
3 changes: 3 additions & 0 deletions src/defib/transport/serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ async def flush_input(self) -> None:
async def flush_output(self) -> None:
self._port.reset_output_buffer()

async def set_baudrate(self, baud: int) -> None:
self._port.baudrate = baud

async def bytes_waiting(self) -> int:
return int(self._port.in_waiting)

Expand Down
38 changes: 38 additions & 0 deletions src/defib/transport/serial_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,44 @@ async def create_transport(
logger.info("Using RFC 2217 transport: %s", device)
return await Rfc2217Transport.create(device, baudrate=baudrate)

# Rack pod: TCP UART bridge + HTTP control plane for baud sync.
# URL form: rack://host[:bridge_port][?api=http_port]. Defaults
# are 9000 / 8080. Differs from tcp:// only in that set_baudrate()
# POSTs to /uart/baud, so the on-device agent's set_baud rendezvous
# actually syncs both ends.
if device.startswith("rack://"):
from defib.transport.rack import RackTransport
endpoint = device[len("rack://"):]
# Optional ?api=NNN suffix
api_port = 8080
if "?" in endpoint:
endpoint, _, query = endpoint.partition("?")
for kv in query.split("&"):
if kv.startswith("api="):
try:
api_port = int(kv[len("api="):])
except ValueError as e:
raise TransportError(
f"rack:// api port is not a number: {kv!r}"
) from e
if ":" in endpoint:
host, _, bp = endpoint.partition(":")
try:
bridge_port = int(bp)
except ValueError as e:
raise TransportError(
f"rack:// bridge port is not a number: {bp!r}"
) from e
else:
host = endpoint
bridge_port = 9000
if not host:
raise TransportError(f"rack:// transport needs a host (got '{device}')")
logger.info(
"Using RackTransport: %s:%d (api :%d)", host, bridge_port, api_port,
)
return await RackTransport.create_rack(host, bridge_port, api_port)

platform = force_platform or sys.platform

if platform == "darwin":
Expand Down
Loading
Loading