Skip to content

Commit 39b3d9f

Browse files
committed
net/mana: replace event-driven reset with timer-based reset
Consolidate reset handling into IBV_EVENT_DEVICE_FATAL with a timer-based reset exit, replacing the separate IBV_EVENT_PORT_ERR and IBV_EVENT_PORT_ACTIVE event handling. - Merge PORT_ERR handling into DEVICE_FATAL case and remove IBV_EVENT_PORT_ERR and IBV_EVENT_PORT_ACTIVE switch cases. - After mana_reset_enter completes teardown, schedule a 10-second timer that triggers mana_reset_exit to re-probe and restart the device. - Release reset_ops_lock after scheduling the timer so the application can call dev_stop/dev_close during the timer window. - dev_stop and dev_close use blocking rte_spinlock_lock and cancel the pending timer when called during the reset window, allowing the application to perform a clean shutdown instead of reset. - All other ops check dev_state != MANA_DEV_ACTIVE and return -EBUSY so they remain blocked during the entire reset window. - mana_reset_timer_cb reschedules after 1 second if the lock is held, instead of silently skipping. - mana_reset_exit_delay returns early if state is no longer RESET_EXIT (app already handled cleanup). - Make mana_stop_tx_queues and mana_stop_rx_queues idempotent by returning 0 when queues are already stopped. - Add NULL guard in mana_intr_uninstall to prevent double-free. - Cancel the reset timer in mana_dev_close for safe device removal. Signed-off-by: Long Li <longli@microsoft.com>
1 parent ab6efdd commit 39b3d9f

1 file changed

Lines changed: 154 additions & 71 deletions

File tree

drivers/net/mana/mana.c

Lines changed: 154 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,17 @@ mana_dev_stop(struct rte_eth_dev *dev)
297297
}
298298

299299
static int mana_intr_uninstall(struct mana_priv *priv);
300+
static void mana_reset_timer_cb(void *arg);
300301

301302
static int
302303
mana_dev_close(struct rte_eth_dev *dev)
303304
{
304305
struct mana_priv *priv = dev->data->dev_private;
305306
int ret;
306307

308+
/* Cancel pending reset timer to prevent firing during teardown */
309+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
310+
307311
DRV_LOG(DEBUG, "Free MR for priv %p", priv);
308312
mana_remove_all_mr(priv);
309313

@@ -337,10 +341,13 @@ mana_dev_close(struct rte_eth_dev *dev)
337341
&priv->dev_state, rte_memory_order_acquire);
338342
if (state == MANA_DEV_ACTIVE ||
339343
state == MANA_DEV_RESET_FAILED) {
340-
ret = ibv_close_device(priv->ib_ctx);
341-
if (ret) {
342-
ret = errno;
343-
return ret;
344+
if (priv->ib_ctx) {
345+
ret = ibv_close_device(priv->ib_ctx);
346+
if (ret) {
347+
ret = errno;
348+
return ret;
349+
}
350+
priv->ib_ctx = NULL;
344351
}
345352
}
346353

@@ -440,6 +447,11 @@ mana_dev_info_get_lock(struct rte_eth_dev *dev,
440447
struct mana_priv *priv = dev->data->dev_private;
441448
int ret;
442449
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
450+
if (rte_atomic_load_explicit(&priv->dev_state,
451+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
452+
rte_spinlock_unlock(&priv->reset_ops_lock);
453+
return -EBUSY;
454+
}
443455
ret = mana_dev_info_get(dev, dev_info);
444456
rte_spinlock_unlock(&priv->reset_ops_lock);
445457
} else {
@@ -617,6 +629,11 @@ mana_dev_tx_queue_setup_lock(struct rte_eth_dev *dev, uint16_t queue_idx,
617629
struct mana_priv *priv = dev->data->dev_private;
618630
int ret;
619631
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
632+
if (rte_atomic_load_explicit(&priv->dev_state,
633+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
634+
rte_spinlock_unlock(&priv->reset_ops_lock);
635+
return -EBUSY;
636+
}
620637
ret = mana_dev_tx_queue_setup(dev, queue_idx,
621638
nb_desc, socket_id, tx_conf);
622639
rte_spinlock_unlock(&priv->reset_ops_lock);
@@ -712,6 +729,11 @@ mana_dev_rx_queue_setup_lock(struct rte_eth_dev *dev, uint16_t queue_idx,
712729
struct mana_priv *priv = dev->data->dev_private;
713730
int ret;
714731
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
732+
if (rte_atomic_load_explicit(&priv->dev_state,
733+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
734+
rte_spinlock_unlock(&priv->reset_ops_lock);
735+
return -EBUSY;
736+
}
715737
ret = mana_dev_rx_queue_setup(dev, queue_idx, nb_desc,
716738
socket_id, rx_conf, mp);
717739
rte_spinlock_unlock(&priv->reset_ops_lock);
@@ -919,6 +941,12 @@ _func##_lock(struct rte_eth_dev *dev) \
919941
struct mana_priv *priv = dev->data->dev_private; \
920942
int ret; \
921943
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
944+
if (rte_atomic_load_explicit(&priv->dev_state, \
945+
rte_memory_order_acquire) != \
946+
MANA_DEV_ACTIVE) { \
947+
rte_spinlock_unlock(&priv->reset_ops_lock); \
948+
return -EBUSY; \
949+
} \
922950
ret = _func(dev); \
923951
rte_spinlock_unlock(&priv->reset_ops_lock); \
924952
} else { \
@@ -931,11 +959,60 @@ MANA_OPS_1_LOCK(mana_dev_configure)
931959

932960
MANA_OPS_1_LOCK(mana_dev_start)
933961

934-
MANA_OPS_1_LOCK(mana_dev_stop)
935-
936-
MANA_OPS_1_LOCK(mana_dev_close)
937962
#undef MANA_OPS_1_LOCK
938963

964+
/*
965+
* Custom lock wrappers for dev_stop and dev_close.
966+
* These use a blocking lock (not trylock) so they wait for any
967+
* in-progress mana_reset_enter or mana_reset_exit_delay to finish,
968+
* rather than returning -EBUSY. When the device is not in
969+
* MANA_DEV_ACTIVE state, they cancel the pending reset timer,
970+
* transition state to MANA_DEV_ACTIVE, and return success without
971+
* calling the underlying function (which was already called by
972+
* mana_reset_enter).
973+
*/
974+
static int
975+
mana_dev_stop_lock(struct rte_eth_dev *dev)
976+
{
977+
struct mana_priv *priv = dev->data->dev_private;
978+
int ret;
979+
980+
rte_spinlock_lock(&priv->reset_ops_lock);
981+
982+
if (rte_atomic_load_explicit(&priv->dev_state,
983+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
984+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
985+
rte_atomic_store_explicit(&priv->dev_state,
986+
MANA_DEV_ACTIVE, rte_memory_order_release);
987+
rte_spinlock_unlock(&priv->reset_ops_lock);
988+
return 0;
989+
}
990+
991+
ret = mana_dev_stop(dev);
992+
rte_spinlock_unlock(&priv->reset_ops_lock);
993+
return ret;
994+
}
995+
996+
static int
997+
mana_dev_close_lock(struct rte_eth_dev *dev)
998+
{
999+
struct mana_priv *priv = dev->data->dev_private;
1000+
int ret;
1001+
1002+
rte_spinlock_lock(&priv->reset_ops_lock);
1003+
1004+
if (rte_atomic_load_explicit(&priv->dev_state,
1005+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
1006+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
1007+
rte_atomic_store_explicit(&priv->dev_state,
1008+
MANA_DEV_ACTIVE, rte_memory_order_release);
1009+
}
1010+
1011+
ret = mana_dev_close(dev);
1012+
rte_spinlock_unlock(&priv->reset_ops_lock);
1013+
return ret;
1014+
}
1015+
9391016
#define MANA_OPS_2_LOCK(_func) \
9401017
static int \
9411018
_func##_lock(struct rte_eth_dev *dev, \
@@ -944,6 +1021,12 @@ _func##_lock(struct rte_eth_dev *dev, \
9441021
struct mana_priv *priv = dev->data->dev_private; \
9451022
int ret; \
9461023
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1024+
if (rte_atomic_load_explicit(&priv->dev_state, \
1025+
rte_memory_order_acquire) != \
1026+
MANA_DEV_ACTIVE) { \
1027+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1028+
return -EBUSY; \
1029+
} \
9471030
ret = _func(dev, rss_conf); \
9481031
rte_spinlock_unlock(&priv->reset_ops_lock); \
9491032
} else { \
@@ -963,6 +1046,14 @@ _func##_lock(struct rte_eth_dev *dev, uint16_t _arg) \
9631046
{ \
9641047
struct mana_priv *priv = dev->data->dev_private; \
9651048
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1049+
if (rte_atomic_load_explicit(&priv->dev_state, \
1050+
rte_memory_order_acquire) != \
1051+
MANA_DEV_ACTIVE) { \
1052+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1053+
DRV_LOG(ERR, "Device reset in progress, " \
1054+
"%s not called", #_func); \
1055+
return; \
1056+
} \
9661057
_func(dev, _arg); \
9671058
rte_spinlock_unlock(&priv->reset_ops_lock); \
9681059
} else { \
@@ -983,6 +1074,12 @@ _func##_lock(struct rte_eth_dev *dev, uint16_t _arg) \
9831074
struct mana_priv *priv = dev->data->dev_private; \
9841075
int ret; \
9851076
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1077+
if (rte_atomic_load_explicit(&priv->dev_state, \
1078+
rte_memory_order_acquire) != \
1079+
MANA_DEV_ACTIVE) { \
1080+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1081+
return -EBUSY; \
1082+
} \
9861083
ret = _func(dev, _arg); \
9871084
rte_spinlock_unlock(&priv->reset_ops_lock); \
9881085
} else { \
@@ -1212,32 +1309,43 @@ mana_ibv_device_to_pci_addr(const struct ibv_device *device,
12121309
static int mana_pci_probe(struct rte_pci_driver *pci_drv,
12131310
struct rte_pci_device *pci_dev);
12141311
static void mana_intr_handler(void *arg);
1312+
static void mana_reset_exit(struct mana_priv *priv);
12151313

1216-
/* Timeout for waiting for PORT_ACTIVE after PORT_ERR */
1217-
#define MANA_RESET_TIMEOUT_US (90 * 1000000ULL) /* 90 seconds */
1314+
/* Delay before initiating reset exit after reset enter completes */
1315+
#define MANA_RESET_TIMER_US (15 * 1000000ULL) /* 15 seconds */
12181316

12191317
static void
1220-
mana_reset_timeout(void *arg)
1318+
mana_reset_timer_cb(void *arg)
12211319
{
12221320
struct mana_priv *priv = (struct mana_priv *)arg;
1223-
enum mana_device_state expected = MANA_DEV_RESET_EXIT;
12241321

1225-
/* Use CAS to claim ownership — only one of timeout or
1226-
* mana_reset_exit_delay can transition out of RESET_EXIT.
1322+
/* Try to acquire the lock. If the application is already
1323+
* holding it (e.g. doing dev_stop/dev_close), reschedule
1324+
* the timer to retry later.
12271325
*/
1228-
if (!rte_atomic_compare_exchange_strong_explicit(
1229-
&priv->dev_state, &expected,
1230-
MANA_DEV_RESET_FAILED,
1231-
rte_memory_order_acq_rel,
1232-
rte_memory_order_acquire)) {
1233-
DRV_LOG(DEBUG, "Reset timeout fired but state is %d",
1234-
expected);
1326+
if (!rte_spinlock_trylock(&priv->reset_ops_lock)) {
1327+
DRV_LOG(DEBUG, "Reset timer: lock held, rescheduling");
1328+
if (rte_atomic_load_explicit(&priv->dev_state,
1329+
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
1330+
if (rte_eal_alarm_set(1000000,
1331+
mana_reset_timer_cb, priv))
1332+
DRV_LOG(ERR, "Failed to reschedule reset timer");
1333+
}
12351334
return;
12361335
}
12371336

1238-
DRV_LOG(ERR, "Reset timeout: PORT_ACTIVE not received within %us",
1239-
(unsigned int)(MANA_RESET_TIMEOUT_US / 1000000));
1240-
rte_spinlock_unlock(&priv->reset_ops_lock);
1337+
if (rte_atomic_load_explicit(&priv->dev_state,
1338+
rte_memory_order_acquire) != MANA_DEV_RESET_EXIT) {
1339+
DRV_LOG(DEBUG, "Reset timer fired but not in RESET_EXIT state");
1340+
rte_spinlock_unlock(&priv->reset_ops_lock);
1341+
return;
1342+
}
1343+
1344+
DRV_LOG(INFO, "Reset timer fired, initiating reset exit");
1345+
mana_reset_exit(priv);
1346+
/* Lock is released by mana_reset_exit_delay at the end of
1347+
* the reset exit processing.
1348+
*/
12411349
}
12421350

12431351
static void
@@ -1303,20 +1411,22 @@ mana_reset_enter(struct mana_priv *priv)
13031411
rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_EXIT,
13041412
rte_memory_order_release);
13051413

1306-
ret = rte_eal_alarm_set(MANA_RESET_TIMEOUT_US,
1307-
mana_reset_timeout, priv);
1414+
ret = rte_eal_alarm_set(MANA_RESET_TIMER_US,
1415+
mana_reset_timer_cb, priv);
13081416
if (ret) {
1309-
DRV_LOG(ERR, "Failed to set reset timeout alarm ret %d", ret);
1310-
DRV_LOG(ERR, "No timeout protection, transitioning to RESET_FAILED");
1417+
DRV_LOG(ERR, "Failed to set reset timer ret %d", ret);
1418+
DRV_LOG(ERR, "Cannot schedule reset exit, transitioning to RESET_FAILED");
13111419
rte_atomic_store_explicit(&priv->dev_state,
13121420
MANA_DEV_RESET_FAILED,
13131421
rte_memory_order_release);
13141422
rte_spinlock_unlock(&priv->reset_ops_lock);
13151423
return;
13161424
}
13171425

1318-
DRV_LOG(DEBUG, "Waiting for reset complete event");
1426+
DRV_LOG(DEBUG, "Reset exit timer scheduled");
13191427

1428+
/* Release the lock so the application can call dev_stop/dev_close */
1429+
rte_spinlock_unlock(&priv->reset_ops_lock);
13201430
return;
13211431

13221432
reset_failed:
@@ -1335,32 +1445,19 @@ mana_reset_exit_delay(void *arg)
13351445

13361446
DRV_LOG(DEBUG, "Delayed mana device reset complete processing");
13371447

1338-
/*
1339-
* Use CAS to verify state is still RESET_EXIT. The alarm is
1340-
* guaranteed cancelled by mana_reset_exit before this thread
1341-
* is created, so timeout cannot race here. The CAS is purely
1342-
* defensive — symmetric with mana_reset_timeout's CAS.
1448+
/* If the app called dev_stop/dev_close during the timer window,
1449+
* state is no longer RESET_EXIT. Nothing to do.
13431450
*/
1344-
enum mana_device_state expected = MANA_DEV_RESET_EXIT;
1345-
if (!rte_atomic_compare_exchange_strong_explicit(
1346-
&priv->dev_state, &expected,
1347-
MANA_DEV_RESET_EXIT,
1348-
rte_memory_order_acq_rel,
1349-
rte_memory_order_acquire)) {
1350-
DRV_LOG(ERR, "Wrong device state %d, exiting", expected);
1351-
/*
1352-
* Timeout or other path already took ownership.
1353-
* Do NOT unlock — the other path already did.
1354-
*/
1451+
if (rte_atomic_load_explicit(&priv->dev_state,
1452+
rte_memory_order_acquire) != MANA_DEV_RESET_EXIT) {
1453+
DRV_LOG(DEBUG, "State is not RESET_EXIT, skipping");
1454+
rte_spinlock_unlock(&priv->reset_ops_lock);
13551455
return ret;
13561456
}
13571457

13581458
dev = &rte_eth_devices[priv->port_id];
13591459
pci_dev = RTE_ETH_DEV_TO_PCI(dev);
13601460

1361-
/* Cancel the timeout alarm to prevent race during reset-exit */
1362-
rte_eal_alarm_cancel(mana_reset_timeout, priv);
1363-
13641461
DRV_LOG(DEBUG, "Resetting dev = %p, priv = %p", dev, priv);
13651462

13661463
ret = ibv_close_device(priv->ib_ctx);
@@ -1495,8 +1592,8 @@ mana_reset_exit(struct mana_priv *priv)
14951592
}
14961593
DRV_LOG(DEBUG, "Entering into device reset complete processing");
14971594

1498-
/* Cancel the reset timeout alarm — PORT_ACTIVE arrived in time */
1499-
rte_eal_alarm_cancel(mana_reset_timeout, priv);
1595+
/* Cancel the reset timer (harmless no-op if already fired) */
1596+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
15001597

15011598
rxq_intr_disable(priv);
15021599

@@ -1544,13 +1641,6 @@ mana_intr_handler(void *arg)
15441641

15451642
switch (event.event_type) {
15461643
case IBV_EVENT_DEVICE_FATAL:
1547-
dev = &rte_eth_devices[priv->port_id];
1548-
if (dev->data->dev_conf.intr_conf.rmv)
1549-
rte_eth_dev_callback_process(dev,
1550-
RTE_ETH_EVENT_INTR_RMV, NULL);
1551-
break;
1552-
1553-
case IBV_EVENT_PORT_ERR:
15541644
DRV_LOG(INFO, "Device reset event received");
15551645
if (rte_atomic_load_explicit(&priv->dev_state,
15561646
rte_memory_order_acquire) == MANA_DEV_ACTIVE) {
@@ -1559,22 +1649,11 @@ mana_intr_handler(void *arg)
15591649
} else {
15601650
DRV_LOG(ERR, "Already in reset handling");
15611651
}
1562-
break;
15631652

1564-
case IBV_EVENT_PORT_ACTIVE:
1565-
DRV_LOG(INFO, "Device reset Complete event received");
1566-
if (rte_atomic_load_explicit(&priv->dev_state,
1567-
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
1568-
mana_reset_exit(priv);
1569-
} else {
1570-
if (rte_atomic_load_explicit(&priv->dev_state,
1571-
rte_memory_order_acquire) == MANA_DEV_ACTIVE)
1572-
DRV_LOG(ERR, "Not in "
1573-
"MANA_DEV_RESET_EXIT state");
1574-
else
1575-
DRV_LOG(ERR, "Still in "
1576-
"MANA_DEV_RESET_ENTER state");
1577-
}
1653+
dev = &rte_eth_devices[priv->port_id];
1654+
if (dev->data->dev_conf.intr_conf.rmv)
1655+
rte_eth_dev_callback_process(dev,
1656+
RTE_ETH_EVENT_INTR_RMV, NULL);
15781657
break;
15791658

15801659
default:
@@ -1590,6 +1669,9 @@ mana_intr_uninstall(struct mana_priv *priv)
15901669
{
15911670
int ret;
15921671

1672+
if (!priv->intr_handle)
1673+
return 0;
1674+
15931675
ret = rte_intr_callback_unregister(priv->intr_handle,
15941676
mana_intr_handler, priv);
15951677
if (ret <= 0) {
@@ -1598,6 +1680,7 @@ mana_intr_uninstall(struct mana_priv *priv)
15981680
}
15991681

16001682
rte_intr_instance_free(priv->intr_handle);
1683+
priv->intr_handle = NULL;
16011684

16021685
return 0;
16031686
}

0 commit comments

Comments
 (0)