Skip to content

Commit 7831f29

Browse files
authored
fix(firmware): phantom LD2410 detection + ENOMEM backoff (ruvnet#1135) (ruvnet#1159)
Bug #2 (root cause): LD2410 probe-detection matched only the 4-byte head 0xF4F3F2F1, so a floating UART at 256000 baud could phantom-detect a sensor and spawn a UART task. Now requires a full validated report frame (head + sane length + tail 0xF8F7F6F5), extracted to mmwave_detect.h and shared with a host unit test (test_mmwave_detect.c, 8 vectors) so firmware and test can't diverge. Matches the validate-before-trust approach used for MR60 in ruvnet#1107. Bug #1: sendto ENOMEM used a fixed 100 ms backoff too short to drain sustained lwIP/WiFi buffer pressure, so a node could stay stuck. Now exponential (100->200->...->2000 ms per consecutive ENOMEM, reset on first successful send). Removing the phantom LD2410 task (bug #2) also removes the extra load that tipped the reporter's tier-2 node into the stuck state. Validated on ESP32-S3 QFN56 rev v0.2 (the reporter's silicon): tier-2 streams ~100 frames/s with no stuck ENOMEM and correctly reports no mmWave (no phantom). LD2410 predicate truth table proven (head-without-tail REJECTED). Could not reproduce the reporter's environment-specific floating-pin noise, so the deterministic proof is the host unit test.
1 parent 4bf88e1 commit 7831f29

5 files changed

Lines changed: 159 additions & 13 deletions

File tree

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @file mmwave_detect.h
3+
* @brief Pure (host-testable) mmWave frame-validation predicates for probe-time
4+
* sensor detection. No ESP-IDF deps — safe to #include in a host unit test.
5+
*
6+
* Detection must validate a *full* frame, never a bare header byte/pattern: a
7+
* floating UART with no sensor reads line noise that can contain header-looking
8+
* bytes, which the old loose checks mistook for a real sensor (#1107 MR60,
9+
* #1135 LD2410). These predicates are the validate-before-trust gate.
10+
*/
11+
#ifndef MMWAVE_DETECT_H
12+
#define MMWAVE_DETECT_H
13+
14+
#include <stdint.h>
15+
#include <stdbool.h>
16+
17+
/**
18+
* True iff buf[i..] begins a *validated* LD2410 report frame within [0,len):
19+
* F4 F3 F2 F1 | len(LE,2) | data[len] | F8 F7 F6 F5
20+
* Requires the head magic, a sane intra-frame length, AND the matching tail at
21+
* head+6+len. Pure noise that merely contains 0xF4F3F2F1 fails the tail check.
22+
*/
23+
static inline bool mmwave_ld2410_valid_at(const uint8_t *buf, int i, int len)
24+
{
25+
if (i < 0 || i + 5 >= len) return false;
26+
if (!(buf[i] == 0xF4 && buf[i+1] == 0xF3 && buf[i+2] == 0xF2 && buf[i+3] == 0xF1))
27+
return false;
28+
uint16_t flen = (uint16_t)buf[i+4] | ((uint16_t)buf[i+5] << 8);
29+
/* Real LD2410 report frames are small (basic=13, engineering=35). */
30+
if (flen < 1 || flen > 64) return false;
31+
int tail = i + 6 + (int)flen;
32+
if (tail + 3 >= len) return false;
33+
return buf[tail] == 0xF8 && buf[tail+1] == 0xF7
34+
&& buf[tail+2] == 0xF6 && buf[tail+3] == 0xF5;
35+
}
36+
37+
#endif /* MMWAVE_DETECT_H */

firmware/esp32-csi-node/main/mmwave_sensor.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
*/
2727

2828
#include "mmwave_sensor.h"
29+
#include "mmwave_detect.h"
2930

3031
#include <string.h>
3132
#include <math.h>
@@ -401,10 +402,12 @@ static mmwave_type_t probe_at_baud(uint32_t baud)
401402
}
402403
}
403404
}
404-
/* LD2410: 4-byte header 0xF4F3F2F1 (already specific enough). */
405-
if (i + 3 < len && buf[i] == 0xF4 && buf[i+1] == 0xF3
406-
&& buf[i+2] == 0xF2 && buf[i+3] == 0xF1
407-
&& baud == MMWAVE_LD2410_BAUD) {
405+
/* LD2410: require a *full validated* report frame, not just the
406+
* 4-byte head. A floating UART1 at 256000 baud can emit the head
407+
* pattern 0xF4F3F2F1 from line noise (#1135 bug #2). The shared
408+
* predicate (host-unit-tested in mmwave_detect.h) demands a sane
409+
* intra-frame length AND the matching tail 0xF8F7F6F5. */
410+
if (baud == MMWAVE_LD2410_BAUD && mmwave_ld2410_valid_at(buf, i, len)) {
408411
ld2410_header_seen++;
409412
}
410413
}

firmware/esp32-csi-node/main/stream_sender.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,16 @@ static struct sockaddr_in s_dest_addr;
2626
* rapid-fire CSI callbacks can exhaust the pbuf pool and crash the device.
2727
*/
2828
static int64_t s_backoff_until_us = 0; /* esp_timer timestamp to resume */
29-
#define ENOMEM_COOLDOWN_MS 100 /* suppress sends for 100 ms */
29+
#define ENOMEM_COOLDOWN_MS 100 /* base backoff; doubles per streak */
30+
#define ENOMEM_COOLDOWN_MAX_MS 2000 /* cap on the exponential backoff */
3031
#define ENOMEM_LOG_INTERVAL 50 /* log every Nth suppressed send */
3132
static uint32_t s_enomem_suppressed = 0;
33+
/* Consecutive ENOMEM episodes without an intervening successful send. A fixed
34+
* 100 ms backoff is too short to drain sustained lwIP/WiFi buffer pressure
35+
* (#1135 bug #1: tier-2 + concurrent TX keeps the node stuck), so the backoff
36+
* grows 100→200→400→…→2000 ms per streak and resets on the first send that
37+
* succeeds. */
38+
static uint32_t s_enomem_streak = 0;
3239

3340
static int sender_init_internal(const char *ip, uint16_t port)
3441
{
@@ -93,16 +100,24 @@ int stream_sender_send(const uint8_t *data, size_t len)
93100
(struct sockaddr *)&s_dest_addr, sizeof(s_dest_addr));
94101
if (sent < 0) {
95102
if (errno == ENOMEM) {
96-
/* Start backoff to let lwIP reclaim buffers */
97-
s_backoff_until_us = esp_timer_get_time() +
98-
(int64_t)ENOMEM_COOLDOWN_MS * 1000;
99-
ESP_LOGW(TAG, "sendto ENOMEM — backing off for %d ms", ENOMEM_COOLDOWN_MS);
103+
/* Exponential backoff: double the cooldown each consecutive ENOMEM
104+
* (capped) so sustained buffer pressure actually drains instead of
105+
* the node re-failing every 100 ms forever (#1135 bug #1). */
106+
uint32_t shift = s_enomem_streak < 5 ? s_enomem_streak : 5;
107+
uint32_t cooldown = ENOMEM_COOLDOWN_MS << shift;
108+
if (cooldown > ENOMEM_COOLDOWN_MAX_MS) cooldown = ENOMEM_COOLDOWN_MAX_MS;
109+
s_enomem_streak++;
110+
s_backoff_until_us = esp_timer_get_time() + (int64_t)cooldown * 1000;
111+
ESP_LOGW(TAG, "sendto ENOMEM — backing off for %lu ms (streak %lu)",
112+
(unsigned long)cooldown, (unsigned long)s_enomem_streak);
100113
} else {
101114
ESP_LOGW(TAG, "sendto failed: errno %d", errno);
102115
}
103116
return -1;
104117
}
105118

119+
/* A send got through — buffer pressure cleared; reset the backoff streak. */
120+
s_enomem_streak = 0;
106121
return sent;
107122
}
108123

firmware/esp32-csi-node/test/Makefile

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ FUZZ_DURATION ?= 30
4444
FUZZ_JOBS ?= 1
4545

4646
.PHONY: all clean run_serialize run_edge run_nvs run_all test_adr110 run_adr110 \
47-
test_vitals run_vitals host_tests
47+
test_vitals run_vitals test_mmwave_detect run_mmwave_detect host_tests
4848

49-
all: fuzz_serialize fuzz_edge fuzz_nvs test_adr110 test_vitals
49+
all: fuzz_serialize fuzz_edge fuzz_nvs test_adr110 test_vitals test_mmwave_detect
5050

5151
# --- ADR-110 encoding unit tests ---
5252
# Host-side, no libFuzzer needed — plain C99 deterministic table tests
@@ -69,8 +69,19 @@ test_vitals: test_vitals_count_presence.c $(MAIN_DIR)/edge_processing.h
6969
run_vitals: test_vitals
7070
./test_vitals
7171

72-
host_tests: run_adr110 run_vitals
73-
@echo "Host tests passed (ADR-110 + vitals #998/#996)"
72+
# --- mmWave LD2410 detection predicate (#1135 bug #2) ---
73+
# Host-side, no libFuzzer. Proves a floating-UART head pattern (0xF4F3F2F1)
74+
# without a valid frame length+tail is REJECTED, so a phantom LD2410 is never
75+
# detected on a node with no sensor wired. Tests the real predicate the
76+
# firmware uses (../main/mmwave_detect.h) — test and firmware can't disagree.
77+
test_mmwave_detect: test_mmwave_detect.c $(MAIN_DIR)/mmwave_detect.h
78+
cc -std=c99 -Wall -Wextra -I$(MAIN_DIR) -o $@ $<
79+
80+
run_mmwave_detect: test_mmwave_detect
81+
./test_mmwave_detect
82+
83+
host_tests: run_adr110 run_vitals run_mmwave_detect
84+
@echo "Host tests passed (ADR-110 + vitals #998/#996 + mmwave detect #1135)"
7485

7586
# --- Serialize fuzzer ---
7687
# Tests csi_serialize_frame() with random wifi_csi_info_t inputs.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* @file test_mmwave_detect.c
3+
* @brief Host-side unit tests for the LD2410 frame-validation predicate (#1135).
4+
*
5+
* Proves the phantom-detection fix: a floating UART can emit the 4-byte head
6+
* 0xF4F3F2F1, but the predicate rejects it unless a sane length + matching tail
7+
* 0xF8F7F6F5 are also present. Tests the REAL predicate from mmwave_detect.h
8+
* (the same code the firmware's probe_at_baud calls).
9+
*
10+
* cc -std=c99 -Wall -I../main -o test_mmwave_detect test_mmwave_detect.c && ./test_mmwave_detect
11+
*
12+
* Exits 0 on all-pass; prints the failing case otherwise.
13+
*/
14+
#include <stdint.h>
15+
#include <stdio.h>
16+
#include <string.h>
17+
#include "mmwave_detect.h"
18+
19+
static int failures = 0;
20+
#define CHECK(cond, msg) do { \
21+
if (!(cond)) { printf("FAIL: %s\n", msg); failures++; } \
22+
else { printf("ok: %s\n", msg); } \
23+
} while (0)
24+
25+
/* Build a valid LD2410 report frame: F4F3F2F1 | len(LE) | data[len] | F8F7F6F5 */
26+
static int make_frame(uint8_t *out, uint16_t dlen)
27+
{
28+
int n = 0;
29+
out[n++] = 0xF4; out[n++] = 0xF3; out[n++] = 0xF2; out[n++] = 0xF1;
30+
out[n++] = (uint8_t)(dlen & 0xFF); out[n++] = (uint8_t)(dlen >> 8);
31+
for (uint16_t k = 0; k < dlen; k++) out[n++] = (uint8_t)(0xAA ^ k);
32+
out[n++] = 0xF8; out[n++] = 0xF7; out[n++] = 0xF6; out[n++] = 0xF5;
33+
return n;
34+
}
35+
36+
int main(void)
37+
{
38+
uint8_t buf[256];
39+
40+
/* 1. A real basic-report frame (data len 13) validates. */
41+
int n = make_frame(buf, 13);
42+
CHECK(mmwave_ld2410_valid_at(buf, 0, n), "valid basic frame (len=13) accepted");
43+
44+
/* 2. A real engineering-report frame (data len 35) validates. */
45+
n = make_frame(buf, 35);
46+
CHECK(mmwave_ld2410_valid_at(buf, 0, n), "valid engineering frame (len=35) accepted");
47+
48+
/* 3. Head magic present but NO valid tail — the #1135 phantom case. */
49+
memset(buf, 0x00, sizeof(buf));
50+
buf[0]=0xF4; buf[1]=0xF3; buf[2]=0xF2; buf[3]=0xF1; buf[4]=13; buf[5]=0;
51+
/* data present but tail is zeros, not F8F7F6F5 */
52+
CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "head magic without valid tail REJECTED (#1135)");
53+
54+
/* 4. Head magic with insane length is rejected. */
55+
memset(buf, 0xFF, sizeof(buf));
56+
buf[0]=0xF4; buf[1]=0xF3; buf[2]=0xF2; buf[3]=0xF1; buf[4]=0xFF; buf[5]=0xFF; /* len=65535 */
57+
CHECK(!mmwave_ld2410_valid_at(buf, 0, 200), "head magic with oversized length REJECTED");
58+
59+
/* 5. Pure noise (no head) is rejected. */
60+
for (int k = 0; k < 64; k++) buf[k] = (uint8_t)(0x5A + k);
61+
CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "non-header noise REJECTED");
62+
63+
/* 6. Truncated frame (tail would run past the buffer) is rejected. */
64+
n = make_frame(buf, 13);
65+
CHECK(!mmwave_ld2410_valid_at(buf, 0, n - 2), "truncated frame (tail past buffer) REJECTED");
66+
67+
/* 7. Valid frame at a non-zero offset still validates. */
68+
memset(buf, 0x00, sizeof(buf));
69+
n = make_frame(buf + 7, 13);
70+
CHECK(mmwave_ld2410_valid_at(buf, 7, 7 + n), "valid frame at offset 7 accepted");
71+
72+
/* 8. Repeated head bytes without a frame (worst-case noise) rejected. */
73+
for (int k = 0; k + 3 < 64; k += 4) {
74+
buf[k]=0xF4; buf[k+1]=0xF3; buf[k+2]=0xF2; buf[k+3]=0xF1;
75+
}
76+
CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "repeated bare head bytes REJECTED");
77+
78+
printf("\n%s (%d failures)\n", failures ? "FAILED" : "ALL PASS", failures);
79+
return failures ? 1 : 0;
80+
}

0 commit comments

Comments
 (0)