Skip to content

Commit 61f4854

Browse files
committed
ath12k: fix throughput collapse on 5G<->6G roaming (EAP105/QCN9274)
QCN9274 firmware (WLAN.WBE.1.4.1) fails to reset internal BA/TX aggregation state when the same STA MAC re-appears on a different MAC after a cross-band roam. The PHY rate stays high but actual throughput collapses to 0-5 Mbps due to stale BA window tracking. Fixes: WIFI-15461 Signed-off-by: Tanya Singh <tanya_singh@accton.com>
1 parent 8eb9e88 commit 61f4854

4 files changed

Lines changed: 297 additions & 1 deletion

File tree

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2+
From: Tanya Singh <tanya_singh@accton.com>
3+
Date: Mon, 26 May 2026 00:00:00 +0800
4+
Subject: [PATCH] ath12k: force fresh TID setup on ampdu_start to fix BA after roam
5+
6+
When a STA roams between bands (5G<->6G) on the same AP, the firmware's
7+
internal BA session state can become stale. The UPDATE path only updates
8+
the REO queue descriptor but the firmware doesn't reset its BA tracking.
9+
10+
Force a complete TID teardown when ampdu_start fires on an already-active
11+
TID with ba_win > 1, then let it go through fresh allocation. This resets
12+
the REO LUT entry and clears the hardware cache.
13+
14+
Signed-off-by: Tanya Singh <tanya_singh@accton.com>
15+
---
16+
drivers/net/wireless/ath/ath12k/dp_rx.c | 17 +++++++++++++++++
17+
1 file changed, 17 insertions(+)
18+
19+
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
20+
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
21+
@@ -1149,6 +1149,23 @@
22+
23+
rx_tid = &peer->rx_tid[tid];
24+
/* Update the tid queue if it is already setup */
25+
+ /*
26+
+ * Force fresh REO queue on real AMPDU start (ba_win > 1) if TID is
27+
+ * already active. Works around FW not resetting BA state on cross-band roam.
28+
+ */
29+
+ if (rx_tid->active && ba_win_sz > 1) {
30+
+ dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
31+
+ DMA_BIDIRECTIONAL);
32+
+ kfree(rx_tid->vaddr);
33+
+ rx_tid->vaddr = NULL;
34+
+ if (peer->mlo)
35+
+ ath12k_peer_rx_tid_qref_reset(ab, peer->ml_peer_id, tid);
36+
+ else
37+
+ ath12k_peer_rx_tid_qref_reset(ab, peer->peer_id, tid);
38+
+ ath12k_hal_reo_shared_qaddr_cache_clear(ab);
39+
+ rx_tid->active = false;
40+
+ }
41+
+ /* end force-fresh workaround */
42+
if (rx_tid->active) {
43+
paddr = rx_tid->paddr;
44+
ret = ath12k_peer_rx_tid_reo_update(ab, peer, rx_tid,

feeds/ucentral/ucentral-event/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ define Build/Compile
1919
endef
2020

2121
define Package/ucentral-event/install
22-
$(INSTALL_DIR) $(1)/usr/sbin $(1)/etc/init.d $(1)/etc/config
22+
$(INSTALL_DIR) $(1)/usr/sbin $(1)/usr/bin $(1)/etc/init.d $(1)/etc/config
2323
$(INSTALL_BIN) ./files/ucentral-event $(1)/usr/sbin/
2424
$(INSTALL_BIN) ./files/ucentral-wifiscan $(1)/usr/sbin/
25+
$(INSTALL_BIN) ./files/ba-recovery $(1)/usr/bin/
2526
$(INSTALL_BIN) ./files/ucentral-event.init $(1)/etc/init.d/ucentral-event
2627
$(INSTALL_BIN) ./files/ucentral-wifiscan.init $(1)/etc/init.d/ucentral-wifiscan
28+
$(INSTALL_BIN) ./files/ba-recovery.init $(1)/etc/init.d/ba-recovery
2729
$(INSTALL_DATA) ./files/event $(1)/etc/config/
2830
$(INSTALL_DATA) ./files/events.json $(1)/etc/
2931
endef
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#!/bin/sh
2+
# ba-recovery: detect and recover broken BA sessions after cross-band roaming
3+
#
4+
# ROOT CAUSE:
5+
# QCN9274 firmware (WLAN.WBE.1.4.1) maintains internal BA/TX aggregation
6+
# state indexed by STA MAC across both 5G and 6G MACs. When the same STA
7+
# re-appears on a different MAC after a band-hop roam, the firmware does
8+
# NOT reset this state. The PHY rate stays high but actual throughput
9+
# collapses to 0-5 Mbps because the firmware's BA window tracking is stale.
10+
#
11+
# Kernel patch 994 (force-fresh TID on ampdu_start) handles ~50-70% of
12+
# cases by resetting the REO queue. This script is the userspace safety net
13+
# for the remaining cases where firmware still gets stuck.
14+
#
15+
# FLOW:
16+
# 1. Watches hostapd syslog for STA association messages
17+
# 2. Detects band-hops (STA moves between phy5g-apX and phy6g-apX)
18+
# 3. Only acts if STA was previously passing real traffic (>= PKT_THRESHOLD)
19+
# 4. After CHECK_DELAY seconds, samples RX packet count twice (1s apart)
20+
# 5. If delta < PKT_THRESHOLD -> disassociates STA to force clean reconnect
21+
# 6. Stops kicking after MAX_KICKS consecutive failures per STA
22+
#
23+
# Usage: ba-recovery [CHECK_DELAY_SEC] [PKT_THRESHOLD]
24+
# Defaults: CHECK_DELAY=2, PKT_THRESHOLD=2000
25+
#
26+
# Install as init.d service for persistent background operation.
27+
28+
CHECK_DELAY="${1:-2}"
29+
PKT_THRESHOLD="${2:-2000}"
30+
MAX_KICKS=5
31+
32+
# Only run on EAP105 — this workaround is specific to QCN9274 FW bug
33+
BOARD=$(cat /tmp/sysinfo/board_name 2>/dev/null)
34+
case "$BOARD" in
35+
edgecore,eap105) ;;
36+
*)
37+
logger -t ba-recovery "board '$BOARD' is not EAP105, exiting"
38+
exit 0
39+
;;
40+
esac
41+
42+
log() {
43+
logger -t ba-recovery "$*"
44+
echo "[$(date +%s)] $*"
45+
}
46+
47+
get_rx_pkts() {
48+
local mac="$1"
49+
local iface="$2"
50+
local mac_upper pkts
51+
52+
mac_upper=$(echo "$mac" | tr 'a-f' 'A-F')
53+
54+
pkts=$(iwinfo "$iface" assoc 2>/dev/null | \
55+
grep -A2 "$mac_upper" | \
56+
grep "RX:" | \
57+
head -1 | \
58+
sed -n 's/.*[[:space:]]\([0-9]*\) Pkts\..*/\1/p')
59+
60+
[ -z "$pkts" ] && pkts="0"
61+
echo "$pkts"
62+
}
63+
64+
do_recover() {
65+
local mac="$1"
66+
local iface="$2"
67+
local delta="$3"
68+
local kicks last_kick now elapsed
69+
70+
# Increment kick counter
71+
kicks=0
72+
[ -f "$STATE_DIR/${mac}.kicks" ] && kicks=$(cat "$STATE_DIR/${mac}.kicks")
73+
74+
# Reset counter if last kick was more than 30 seconds ago (not a rapid-fire loop)
75+
now=$(date +%s)
76+
last_kick=0
77+
[ -f "$STATE_DIR/${mac}.last_kick" ] && last_kick=$(cat "$STATE_DIR/${mac}.last_kick")
78+
elapsed=$((now - last_kick))
79+
if [ "$elapsed" -gt 30 ]; then
80+
kicks=0
81+
fi
82+
83+
kicks=$((kicks + 1))
84+
echo "$kicks" > "$STATE_DIR/${mac}.kicks"
85+
echo "$now" > "$STATE_DIR/${mac}.last_kick"
86+
87+
if [ "$kicks" -gt "$MAX_KICKS" ]; then
88+
log "SKIP: $mac rapid-fire kicks=$kicks > max=$MAX_KICKS within 30s, backing off"
89+
return
90+
fi
91+
92+
log "RECOVER: $mac on $iface pkt_delta=${delta} < threshold=${PKT_THRESHOLD} (kick $kicks/$MAX_KICKS)"
93+
log "RECOVER: disassociating $mac from $iface"
94+
hostapd_cli -i "$iface" disassociate "$mac" reason=4 tx=0
95+
}
96+
97+
check_sta_pkts() {
98+
local mac="$1"
99+
local iface="$2"
100+
local pkts1 pkts2 delta
101+
102+
pkts1=$(get_rx_pkts "$mac" "$iface")
103+
if [ "$pkts1" = "0" ]; then
104+
log "check: $mac on $iface — STA not found in assoc table"
105+
return
106+
fi
107+
108+
sleep 1
109+
110+
pkts2=$(get_rx_pkts "$mac" "$iface")
111+
if [ "$pkts2" = "0" ]; then
112+
log "check: $mac on $iface — STA gone after 1s"
113+
return
114+
fi
115+
116+
delta=$((pkts2 - pkts1))
117+
118+
# Skip truly idle STAs (no traffic at all) — they aren't broken, just inactive
119+
if [ "$delta" -eq 0 ] && [ ! -f "$STATE_DIR/${mac}.active" ]; then
120+
log "SKIP: $mac on $iface — idle STA (delta=0, never active), ignoring"
121+
return
122+
fi
123+
124+
if [ "$delta" -lt "$PKT_THRESHOLD" ]; then
125+
do_recover "$mac" "$iface" "$delta"
126+
else
127+
log "OK: $mac on $iface pkt_delta=${delta}/s (healthy)"
128+
# Reset kick counter on success
129+
rm -f "$STATE_DIR/${mac}.kicks"
130+
# Mark as active for future reference
131+
echo "$delta" > "$STATE_DIR/${mac}.active"
132+
fi
133+
}
134+
135+
update_active_state() {
136+
local mac="$1"
137+
local iface="$2"
138+
local pkts1 pkts2 delta
139+
140+
pkts1=$(get_rx_pkts "$mac" "$iface")
141+
[ "$pkts1" = "0" ] && return
142+
143+
sleep 1
144+
145+
pkts2=$(get_rx_pkts "$mac" "$iface")
146+
[ "$pkts2" = "0" ] && return
147+
148+
delta=$((pkts2 - pkts1))
149+
if [ "$delta" -ge "$PKT_THRESHOLD" ]; then
150+
echo "$delta" > "$STATE_DIR/${mac}.active"
151+
fi
152+
}
153+
154+
# --- Main ---
155+
log "=== ba-recovery daemon starting (pid=$$) ==="
156+
log "CONFIG: CHECK_DELAY=${CHECK_DELAY}s, PKT_THRESHOLD=${PKT_THRESHOLD}pkts/s, MAX_KICKS=${MAX_KICKS}"
157+
158+
# Discover interfaces (retry up to 30 times waiting for hostapd)
159+
IFACE_5G=""
160+
IFACE_6G=""
161+
RETRIES=30
162+
while [ "$RETRIES" -gt 0 ]; do
163+
for obj in $(ubus list 2>/dev/null | grep "^hostapd\\."); do
164+
iface="${obj#hostapd.}"
165+
case "$iface" in
166+
*5g*) IFACE_5G="$iface" ;;
167+
*6g*) IFACE_6G="$iface" ;;
168+
esac
169+
done
170+
[ -n "$IFACE_5G" ] && [ -n "$IFACE_6G" ] && break
171+
RETRIES=$((RETRIES - 1))
172+
sleep 2
173+
done
174+
175+
if [ -z "$IFACE_5G" ] && [ -z "$IFACE_6G" ]; then
176+
log "ERROR: no hostapd interfaces found after 60s"
177+
exit 1
178+
fi
179+
[ -n "$IFACE_5G" ] && log " 5G interface: $IFACE_5G"
180+
[ -n "$IFACE_6G" ] && log " 6G interface: $IFACE_6G"
181+
182+
# State tracking
183+
STATE_DIR="/tmp/ba-recovery-state"
184+
rm -rf "$STATE_DIR"
185+
mkdir -p "$STATE_DIR"
186+
187+
cleanup() {
188+
rm -rf "$STATE_DIR"
189+
kill $(jobs -p) 2>/dev/null
190+
exit 0
191+
}
192+
trap cleanup INT TERM
193+
194+
log "=== ready, watching logread for hostapd events ==="
195+
196+
# Watch syslog for hostapd "associated" messages
197+
# Format: "hostapd: phy5g-ap0: STA xx:xx:xx:xx:xx:xx IEEE 802.11: associated (aid N)"
198+
logread -f | while IFS= read -r line; do
199+
case "$line" in
200+
*"IEEE 802.11: associated (aid"*)
201+
# Extract MAC and interface from hostapd log
202+
mac=$(echo "$line" | sed -n 's/.*STA \([0-9a-fA-F:]*\) IEEE.*/\1/p' | tr 'A-F' 'a-f')
203+
iface=$(echo "$line" | sed -n 's/.*hostapd: \([^ :]*\): STA.*/\1/p')
204+
205+
[ -z "$mac" ] && continue
206+
[ -z "$iface" ] && continue
207+
208+
# Determine band from interface name
209+
band=""
210+
case "$iface" in
211+
*5g*) band="5g" ;;
212+
*6g*) band="6g" ;;
213+
*) continue ;;
214+
esac
215+
216+
# Check for band hop
217+
prev_band=""
218+
[ -f "$STATE_DIR/$mac" ] && prev_band=$(cat "$STATE_DIR/$mac")
219+
echo "$band" > "$STATE_DIR/$mac"
220+
221+
if [ -n "$prev_band" ] && [ "$prev_band" != "$band" ]; then
222+
log "BAND-HOP: $mac moved $prev_band -> $band ($iface)"
223+
(
224+
sleep "$CHECK_DELAY"
225+
check_sta_pkts "$mac" "$iface"
226+
) &
227+
else
228+
# Same band or first association — sample traffic to track active state
229+
# Do this in background to not block the log reader
230+
(
231+
sleep 1
232+
update_active_state "$mac" "$iface"
233+
) &
234+
fi
235+
;;
236+
esac
237+
done
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh /etc/rc.common
2+
3+
START=99
4+
USE_PROCD=1
5+
6+
start_service() {
7+
procd_open_instance
8+
procd_set_param command /usr/bin/ba-recovery 2 2000
9+
procd_set_param respawn 3600 5 5
10+
procd_set_param stdout 1
11+
procd_set_param stderr 1
12+
procd_close_instance
13+
}

0 commit comments

Comments
 (0)