Skip to content

Commit f6ac515

Browse files
lukaszraczylopelwell
authored andcommitted
net: macb: add TX stall watchdog as defence-in-depth safety net
Patches 1/3 and 2/3 address two candidate races that could lead to a TCOMP completion being missed on PCIe-attached macb instances. This patch adds a defence-in-depth safety net, in case a further race remains that we have not identified. The watchdog is a per-queue delayed_work that runs once per second. Movement is tracked via a tx_stall_tail_moved boolean: macb_tx_complete() sets it under tx_ptr_lock whenever tx_tail advances, and the watchdog clears it under the same lock at each tick. If the ring is non-empty (tx_head != tx_tail) and the boolean was still false at the next tick, the watchdog calls macb_tx_restart(). A boolean is used in preference to snapshotting tx_tail and comparing across ticks, because per-queue ring indices are bounded and reused; under sustained load a snapshot comparison can false-positive when the index happens to land on the same value between two ticks. Both writes share tx_ptr_lock with the existing tx_head / tx_tail updates, so no atomic is required. No new recovery logic is introduced. macb_tx_restart() already exists in this file, is correctly locked (tx_ptr_lock, bp->lock), and verifies that the hardware's TBQP is behind the driver's head index before re-asserting TSTART. On a healthy ring it is a no-op at the hardware level; the watchdog only supplies the missing trigger. On a healthy queue the per-tick cost is one spin_lock_irqsave() / spin_unlock_irqrestore(), one branch, and one byte store. The delayed_work is only scheduled between macb_open() and macb_close(), and is cancelled synchronously on close. Context for submission: on our 24-node Raspberry Pi 5 fleet, before this series, an out-of-band user-space watchdog (monitoring tx_packets from /sys/class/net/.../statistics and toggling the link down/up when it froze) was required to keep nodes usable. We include this kernel-side watchdog as a cleaner in-kernel equivalent for any residual stall that patches 1 and 2 do not cover. We are willing to drop this patch if the view is that 1 and 2 should stand alone. Link: cilium/cilium#43198 Link: https://bugs.launchpad.net/ubuntu/+source/linux-raspi/+bug/2133877 Signed-off-by: Lukasz Raczylo <lukasz@raczylo.com>
1 parent 6292b36 commit f6ac515

2 files changed

Lines changed: 76 additions & 0 deletions

File tree

drivers/net/ethernet/cadence/macb.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,17 @@ struct macb_queue {
12891289
struct work_struct tx_error_task;
12901290
bool txubr_pending;
12911291
bool tx_pending;
1292+
1293+
/* TX stall watchdog -- see macb_tx_stall_watchdog() in macb_main.c.
1294+
* tx_stall_tail_moved is set by macb_tx_complete() under tx_ptr_lock
1295+
* whenever tx_tail advances, and cleared by the watchdog tick on the
1296+
* same lock. A bool avoids the index-aliasing false-positive that a
1297+
* snapshot-of-tx_tail comparison would have when the ring index space
1298+
* happens to wrap to the same value between two ticks.
1299+
*/
1300+
struct delayed_work tx_stall_watchdog_work;
1301+
bool tx_stall_tail_moved;
1302+
12921303
struct napi_struct napi_tx;
12931304

12941305
dma_addr_t rx_ring_dma;

drivers/net/ethernet/cadence/macb_main.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,6 +1464,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
14641464
packets, bytes);
14651465

14661466
queue->tx_tail = tail;
1467+
if (packets)
1468+
queue->tx_stall_tail_moved = true;
14671469
if (__netif_subqueue_stopped(bp->dev, queue_index) &&
14681470
CIRC_CNT(queue->tx_head, queue->tx_tail,
14691471
bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
@@ -1998,6 +2000,63 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
19982000
return work_done;
19992001
}
20002002

2003+
#define MACB_TX_STALL_INTERVAL_MS 1000
2004+
2005+
/* TX stall watchdog.
2006+
*
2007+
* Defence-in-depth against lost TCOMP interrupts. macb already has a
2008+
* recovery chain (tx_pending -> txubr_pending -> macb_tx_restart())
2009+
* that fires on TCOMP; if TCOMP itself is lost the TX ring stalls
2010+
* silently until something else kicks TSTART. This watchdog runs
2011+
* once per second per queue and calls macb_tx_restart() if the ring
2012+
* is non-empty and tx_tail has not advanced since the previous tick.
2013+
*
2014+
* Movement is tracked via the tx_stall_tail_moved boolean rather
2015+
* than by snapshotting tx_tail. Per-queue ring indices are bounded
2016+
* (and reused), so a snapshot comparison can false-positive when the
2017+
* index happens to land on the same value between two ticks under
2018+
* sustained load. The boolean is set by macb_tx_complete() whenever
2019+
* tx_tail advances and cleared by this watchdog after each tick;
2020+
* both writes are under tx_ptr_lock, so no atomic is required.
2021+
*
2022+
* macb_tx_restart() already checks the hardware's TBQP against the
2023+
* driver's head index before re-asserting TSTART, so on a healthy
2024+
* ring this is a no-op at the hardware level. The watchdog only
2025+
* adds the missing trigger.
2026+
*/
2027+
static void macb_tx_stall_watchdog(struct work_struct *work)
2028+
{
2029+
struct macb_queue *queue = container_of(to_delayed_work(work),
2030+
struct macb_queue,
2031+
tx_stall_watchdog_work);
2032+
struct macb *bp = queue->bp;
2033+
unsigned int cur_tail, cur_head;
2034+
bool stalled = false;
2035+
unsigned long flags;
2036+
2037+
if (!netif_running(bp->dev))
2038+
return;
2039+
2040+
spin_lock_irqsave(&queue->tx_ptr_lock, flags);
2041+
cur_tail = queue->tx_tail;
2042+
cur_head = queue->tx_head;
2043+
if (cur_head != cur_tail && !queue->tx_stall_tail_moved)
2044+
stalled = true;
2045+
queue->tx_stall_tail_moved = false;
2046+
spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
2047+
2048+
if (stalled) {
2049+
netdev_warn_once(bp->dev,
2050+
"TX stall detected on queue %u (tail=%u head=%u); re-kicking TSTART\n",
2051+
(unsigned int)(queue - bp->queues),
2052+
cur_tail, cur_head);
2053+
macb_tx_restart(queue);
2054+
}
2055+
2056+
schedule_delayed_work(&queue->tx_stall_watchdog_work,
2057+
msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
2058+
}
2059+
20012060
static void macb_hresp_error_task(struct work_struct *work)
20022061
{
20032062
struct macb *bp = from_work(bp, work, hresp_err_bh_work);
@@ -3256,6 +3315,9 @@ static int macb_open(struct net_device *dev)
32563315
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
32573316
napi_enable(&queue->napi_rx);
32583317
napi_enable(&queue->napi_tx);
3318+
queue->tx_stall_tail_moved = true;
3319+
schedule_delayed_work(&queue->tx_stall_watchdog_work,
3320+
msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
32593321
}
32603322

32613323
macb_init_hw(bp);
@@ -3306,6 +3368,7 @@ static int macb_close(struct net_device *dev)
33063368
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
33073369
napi_disable(&queue->napi_rx);
33083370
napi_disable(&queue->napi_tx);
3371+
cancel_delayed_work_sync(&queue->tx_stall_watchdog_work);
33093372
netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
33103373
}
33113374

@@ -4912,6 +4975,8 @@ static int macb_init(struct platform_device *pdev)
49124975
}
49134976

49144977
INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
4978+
INIT_DELAYED_WORK(&queue->tx_stall_watchdog_work,
4979+
macb_tx_stall_watchdog);
49154980
q++;
49164981
}
49174982

0 commit comments

Comments
 (0)