Skip to content

Commit 07f3cb1

Browse files
committed
net/mana: replace event-driven reset with timer-based reset
Consolidate reset handling into IBV_EVENT_DEVICE_FATAL with a timer-based reset exit, replacing the separate IBV_EVENT_PORT_ERR and IBV_EVENT_PORT_ACTIVE event handling. - Merge PORT_ERR handling into DEVICE_FATAL case and remove IBV_EVENT_PORT_ERR and IBV_EVENT_PORT_ACTIVE switch cases. - After mana_reset_enter completes teardown, schedule a 10-second timer that triggers mana_reset_exit to re-probe and restart the device. - Release reset_ops_lock after scheduling the timer so the application can call dev_stop/dev_close during the timer window. - dev_stop and dev_close use blocking rte_spinlock_lock and cancel the pending timer when called during the reset window, allowing the application to perform a clean shutdown instead of reset. - All other ops check dev_state != MANA_DEV_ACTIVE and return -EBUSY so they remain blocked during the entire reset window. - mana_reset_timer_cb reschedules after 1 second if the lock is held, instead of silently skipping. - mana_reset_exit_delay returns early if state is no longer RESET_EXIT (app already handled cleanup). - Make mana_stop_tx_queues and mana_stop_rx_queues idempotent by returning 0 when queues are already stopped. - Add NULL guard in mana_intr_uninstall to prevent double-free. - Cancel the reset timer in mana_dev_close for safe device removal. Signed-off-by: Long Li <longli@microsoft.com>
1 parent ab6efdd commit 07f3cb1

3 files changed

Lines changed: 142 additions & 65 deletions

File tree

drivers/net/mana/mana.c

Lines changed: 140 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,17 @@ mana_dev_stop(struct rte_eth_dev *dev)
297297
}
298298

299299
static int mana_intr_uninstall(struct mana_priv *priv);
300+
static void mana_reset_timer_cb(void *arg);
300301

301302
static int
302303
mana_dev_close(struct rte_eth_dev *dev)
303304
{
304305
struct mana_priv *priv = dev->data->dev_private;
305306
int ret;
306307

308+
/* Cancel pending reset timer to prevent firing during teardown */
309+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
310+
307311
DRV_LOG(DEBUG, "Free MR for priv %p", priv);
308312
mana_remove_all_mr(priv);
309313

@@ -440,6 +444,11 @@ mana_dev_info_get_lock(struct rte_eth_dev *dev,
440444
struct mana_priv *priv = dev->data->dev_private;
441445
int ret;
442446
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
447+
if (rte_atomic_load_explicit(&priv->dev_state,
448+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
449+
rte_spinlock_unlock(&priv->reset_ops_lock);
450+
return -EBUSY;
451+
}
443452
ret = mana_dev_info_get(dev, dev_info);
444453
rte_spinlock_unlock(&priv->reset_ops_lock);
445454
} else {
@@ -617,6 +626,11 @@ mana_dev_tx_queue_setup_lock(struct rte_eth_dev *dev, uint16_t queue_idx,
617626
struct mana_priv *priv = dev->data->dev_private;
618627
int ret;
619628
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
629+
if (rte_atomic_load_explicit(&priv->dev_state,
630+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
631+
rte_spinlock_unlock(&priv->reset_ops_lock);
632+
return -EBUSY;
633+
}
620634
ret = mana_dev_tx_queue_setup(dev, queue_idx,
621635
nb_desc, socket_id, tx_conf);
622636
rte_spinlock_unlock(&priv->reset_ops_lock);
@@ -712,6 +726,11 @@ mana_dev_rx_queue_setup_lock(struct rte_eth_dev *dev, uint16_t queue_idx,
712726
struct mana_priv *priv = dev->data->dev_private;
713727
int ret;
714728
if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
729+
if (rte_atomic_load_explicit(&priv->dev_state,
730+
rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
731+
rte_spinlock_unlock(&priv->reset_ops_lock);
732+
return -EBUSY;
733+
}
715734
ret = mana_dev_rx_queue_setup(dev, queue_idx, nb_desc,
716735
socket_id, rx_conf, mp);
717736
rte_spinlock_unlock(&priv->reset_ops_lock);
@@ -919,6 +938,12 @@ _func##_lock(struct rte_eth_dev *dev) \
919938
struct mana_priv *priv = dev->data->dev_private; \
920939
int ret; \
921940
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
941+
if (rte_atomic_load_explicit(&priv->dev_state, \
942+
rte_memory_order_acquire) != \
943+
MANA_DEV_ACTIVE) { \
944+
rte_spinlock_unlock(&priv->reset_ops_lock); \
945+
return -EBUSY; \
946+
} \
922947
ret = _func(dev); \
923948
rte_spinlock_unlock(&priv->reset_ops_lock); \
924949
} else { \
@@ -931,11 +956,58 @@ MANA_OPS_1_LOCK(mana_dev_configure)
931956

932957
MANA_OPS_1_LOCK(mana_dev_start)
933958

934-
MANA_OPS_1_LOCK(mana_dev_stop)
935-
936-
MANA_OPS_1_LOCK(mana_dev_close)
937959
#undef MANA_OPS_1_LOCK
938960

961+
/*
962+
* Custom lock wrappers for dev_stop and dev_close.
963+
* These use a blocking lock (not trylock) so they wait for any
964+
* in-progress mana_reset_enter or mana_reset_exit_delay to finish,
965+
* rather than returning -EBUSY. When the device is in
966+
* MANA_DEV_RESET_EXIT state (between mana_reset_enter returning
967+
* and mana_reset_timer_cb firing), they cancel the pending timer
968+
* and transition to MANA_DEV_RESET_FAILED so that the underlying
969+
* functions perform proper cleanup (interrupt uninstall, IB close, etc.).
970+
*/
971+
static int
972+
mana_dev_stop_lock(struct rte_eth_dev *dev)
973+
{
974+
struct mana_priv *priv = dev->data->dev_private;
975+
int ret;
976+
977+
rte_spinlock_lock(&priv->reset_ops_lock);
978+
979+
if (rte_atomic_load_explicit(&priv->dev_state,
980+
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
981+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
982+
rte_atomic_store_explicit(&priv->dev_state,
983+
MANA_DEV_ACTIVE, rte_memory_order_release);
984+
}
985+
986+
ret = mana_dev_stop(dev);
987+
rte_spinlock_unlock(&priv->reset_ops_lock);
988+
return ret;
989+
}
990+
991+
static int
992+
mana_dev_close_lock(struct rte_eth_dev *dev)
993+
{
994+
struct mana_priv *priv = dev->data->dev_private;
995+
int ret;
996+
997+
rte_spinlock_lock(&priv->reset_ops_lock);
998+
999+
if (rte_atomic_load_explicit(&priv->dev_state,
1000+
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
1001+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
1002+
rte_atomic_store_explicit(&priv->dev_state,
1003+
MANA_DEV_ACTIVE, rte_memory_order_release);
1004+
}
1005+
1006+
ret = mana_dev_close(dev);
1007+
rte_spinlock_unlock(&priv->reset_ops_lock);
1008+
return ret;
1009+
}
1010+
9391011
#define MANA_OPS_2_LOCK(_func) \
9401012
static int \
9411013
_func##_lock(struct rte_eth_dev *dev, \
@@ -944,6 +1016,12 @@ _func##_lock(struct rte_eth_dev *dev, \
9441016
struct mana_priv *priv = dev->data->dev_private; \
9451017
int ret; \
9461018
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1019+
if (rte_atomic_load_explicit(&priv->dev_state, \
1020+
rte_memory_order_acquire) != \
1021+
MANA_DEV_ACTIVE) { \
1022+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1023+
return -EBUSY; \
1024+
} \
9471025
ret = _func(dev, rss_conf); \
9481026
rte_spinlock_unlock(&priv->reset_ops_lock); \
9491027
} else { \
@@ -963,6 +1041,14 @@ _func##_lock(struct rte_eth_dev *dev, uint16_t _arg) \
9631041
{ \
9641042
struct mana_priv *priv = dev->data->dev_private; \
9651043
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1044+
if (rte_atomic_load_explicit(&priv->dev_state, \
1045+
rte_memory_order_acquire) != \
1046+
MANA_DEV_ACTIVE) { \
1047+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1048+
DRV_LOG(ERR, "Device reset in progress, " \
1049+
"%s not called", #_func); \
1050+
return; \
1051+
} \
9661052
_func(dev, _arg); \
9671053
rte_spinlock_unlock(&priv->reset_ops_lock); \
9681054
} else { \
@@ -983,6 +1069,12 @@ _func##_lock(struct rte_eth_dev *dev, uint16_t _arg) \
9831069
struct mana_priv *priv = dev->data->dev_private; \
9841070
int ret; \
9851071
if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
1072+
if (rte_atomic_load_explicit(&priv->dev_state, \
1073+
rte_memory_order_acquire) != \
1074+
MANA_DEV_ACTIVE) { \
1075+
rte_spinlock_unlock(&priv->reset_ops_lock); \
1076+
return -EBUSY; \
1077+
} \
9861078
ret = _func(dev, _arg); \
9871079
rte_spinlock_unlock(&priv->reset_ops_lock); \
9881080
} else { \
@@ -1213,31 +1305,41 @@ static int mana_pci_probe(struct rte_pci_driver *pci_drv,
12131305
struct rte_pci_device *pci_dev);
12141306
static void mana_intr_handler(void *arg);
12151307

1216-
/* Timeout for waiting for PORT_ACTIVE after PORT_ERR */
1217-
#define MANA_RESET_TIMEOUT_US (90 * 1000000ULL) /* 90 seconds */
1308+
/* Delay before initiating reset exit after reset enter completes */
1309+
#define MANA_RESET_TIMER_US (10 * 1000000ULL) /* 10 seconds */
12181310

12191311
static void
1220-
mana_reset_timeout(void *arg)
1312+
mana_reset_timer_cb(void *arg)
12211313
{
12221314
struct mana_priv *priv = (struct mana_priv *)arg;
1223-
enum mana_device_state expected = MANA_DEV_RESET_EXIT;
12241315

1225-
/* Use CAS to claim ownership — only one of timeout or
1226-
* mana_reset_exit_delay can transition out of RESET_EXIT.
1316+
/* Try to acquire the lock. If the application is already
1317+
* holding it (e.g. doing dev_stop/dev_close), reschedule
1318+
* the timer to retry later.
12271319
*/
1228-
if (!rte_atomic_compare_exchange_strong_explicit(
1229-
&priv->dev_state, &expected,
1230-
MANA_DEV_RESET_FAILED,
1231-
rte_memory_order_acq_rel,
1232-
rte_memory_order_acquire)) {
1233-
DRV_LOG(DEBUG, "Reset timeout fired but state is %d",
1234-
expected);
1320+
if (!rte_spinlock_trylock(&priv->reset_ops_lock)) {
1321+
DRV_LOG(DEBUG, "Reset timer: lock held, rescheduling");
1322+
if (rte_atomic_load_explicit(&priv->dev_state,
1323+
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
1324+
if (rte_eal_alarm_set(1000000,
1325+
mana_reset_timer_cb, priv))
1326+
DRV_LOG(ERR, "Failed to reschedule reset timer");
1327+
}
12351328
return;
12361329
}
12371330

1238-
DRV_LOG(ERR, "Reset timeout: PORT_ACTIVE not received within %us",
1239-
(unsigned int)(MANA_RESET_TIMEOUT_US / 1000000));
1240-
rte_spinlock_unlock(&priv->reset_ops_lock);
1331+
if (rte_atomic_load_explicit(&priv->dev_state,
1332+
rte_memory_order_acquire) != MANA_DEV_RESET_EXIT) {
1333+
DRV_LOG(DEBUG, "Reset timer fired but not in RESET_EXIT state");
1334+
rte_spinlock_unlock(&priv->reset_ops_lock);
1335+
return;
1336+
}
1337+
1338+
DRV_LOG(INFO, "Reset timer fired, initiating reset exit");
1339+
mana_reset_exit(priv);
1340+
/* Lock is released by mana_reset_exit_delay at the end of
1341+
* the reset exit processing.
1342+
*/
12411343
}
12421344

12431345
static void
@@ -1303,20 +1405,22 @@ mana_reset_enter(struct mana_priv *priv)
13031405
rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_EXIT,
13041406
rte_memory_order_release);
13051407

1306-
ret = rte_eal_alarm_set(MANA_RESET_TIMEOUT_US,
1307-
mana_reset_timeout, priv);
1408+
ret = rte_eal_alarm_set(MANA_RESET_TIMER_US,
1409+
mana_reset_timer_cb, priv);
13081410
if (ret) {
1309-
DRV_LOG(ERR, "Failed to set reset timeout alarm ret %d", ret);
1310-
DRV_LOG(ERR, "No timeout protection, transitioning to RESET_FAILED");
1411+
DRV_LOG(ERR, "Failed to set reset timer ret %d", ret);
1412+
DRV_LOG(ERR, "Cannot schedule reset exit, transitioning to RESET_FAILED");
13111413
rte_atomic_store_explicit(&priv->dev_state,
13121414
MANA_DEV_RESET_FAILED,
13131415
rte_memory_order_release);
13141416
rte_spinlock_unlock(&priv->reset_ops_lock);
13151417
return;
13161418
}
13171419

1318-
DRV_LOG(DEBUG, "Waiting for reset complete event");
1420+
DRV_LOG(DEBUG, "Reset exit timer scheduled");
13191421

1422+
/* Release the lock so the application can call dev_stop/dev_close */
1423+
rte_spinlock_unlock(&priv->reset_ops_lock);
13201424
return;
13211425

13221426
reset_failed:
@@ -1335,32 +1439,19 @@ mana_reset_exit_delay(void *arg)
13351439

13361440
DRV_LOG(DEBUG, "Delayed mana device reset complete processing");
13371441

1338-
/*
1339-
* Use CAS to verify state is still RESET_EXIT. The alarm is
1340-
* guaranteed cancelled by mana_reset_exit before this thread
1341-
* is created, so timeout cannot race here. The CAS is purely
1342-
* defensive — symmetric with mana_reset_timeout's CAS.
1442+
/* If the app called dev_stop/dev_close during the timer window,
1443+
* state is no longer RESET_EXIT. Nothing to do.
13431444
*/
1344-
enum mana_device_state expected = MANA_DEV_RESET_EXIT;
1345-
if (!rte_atomic_compare_exchange_strong_explicit(
1346-
&priv->dev_state, &expected,
1347-
MANA_DEV_RESET_EXIT,
1348-
rte_memory_order_acq_rel,
1349-
rte_memory_order_acquire)) {
1350-
DRV_LOG(ERR, "Wrong device state %d, exiting", expected);
1351-
/*
1352-
* Timeout or other path already took ownership.
1353-
* Do NOT unlock — the other path already did.
1354-
*/
1445+
if (rte_atomic_load_explicit(&priv->dev_state,
1446+
rte_memory_order_acquire) != MANA_DEV_RESET_EXIT) {
1447+
DRV_LOG(DEBUG, "State is not RESET_EXIT, skipping");
1448+
rte_spinlock_unlock(&priv->reset_ops_lock);
13551449
return ret;
13561450
}
13571451

13581452
dev = &rte_eth_devices[priv->port_id];
13591453
pci_dev = RTE_ETH_DEV_TO_PCI(dev);
13601454

1361-
/* Cancel the timeout alarm to prevent race during reset-exit */
1362-
rte_eal_alarm_cancel(mana_reset_timeout, priv);
1363-
13641455
DRV_LOG(DEBUG, "Resetting dev = %p, priv = %p", dev, priv);
13651456

13661457
ret = ibv_close_device(priv->ib_ctx);
@@ -1495,8 +1586,8 @@ mana_reset_exit(struct mana_priv *priv)
14951586
}
14961587
DRV_LOG(DEBUG, "Entering into device reset complete processing");
14971588

1498-
/* Cancel the reset timeout alarm — PORT_ACTIVE arrived in time */
1499-
rte_eal_alarm_cancel(mana_reset_timeout, priv);
1589+
/* Cancel the reset timer (harmless no-op if already fired) */
1590+
rte_eal_alarm_cancel(mana_reset_timer_cb, priv);
15001591

15011592
rxq_intr_disable(priv);
15021593

@@ -1548,9 +1639,7 @@ mana_intr_handler(void *arg)
15481639
if (dev->data->dev_conf.intr_conf.rmv)
15491640
rte_eth_dev_callback_process(dev,
15501641
RTE_ETH_EVENT_INTR_RMV, NULL);
1551-
break;
15521642

1553-
case IBV_EVENT_PORT_ERR:
15541643
DRV_LOG(INFO, "Device reset event received");
15551644
if (rte_atomic_load_explicit(&priv->dev_state,
15561645
rte_memory_order_acquire) == MANA_DEV_ACTIVE) {
@@ -1561,22 +1650,6 @@ mana_intr_handler(void *arg)
15611650
}
15621651
break;
15631652

1564-
case IBV_EVENT_PORT_ACTIVE:
1565-
DRV_LOG(INFO, "Device reset Complete event received");
1566-
if (rte_atomic_load_explicit(&priv->dev_state,
1567-
rte_memory_order_acquire) == MANA_DEV_RESET_EXIT) {
1568-
mana_reset_exit(priv);
1569-
} else {
1570-
if (rte_atomic_load_explicit(&priv->dev_state,
1571-
rte_memory_order_acquire) == MANA_DEV_ACTIVE)
1572-
DRV_LOG(ERR, "Not in "
1573-
"MANA_DEV_RESET_EXIT state");
1574-
else
1575-
DRV_LOG(ERR, "Still in "
1576-
"MANA_DEV_RESET_ENTER state");
1577-
}
1578-
break;
1579-
15801653
default:
15811654
break;
15821655
}
@@ -1590,6 +1663,9 @@ mana_intr_uninstall(struct mana_priv *priv)
15901663
{
15911664
int ret;
15921665

1666+
if (!priv->intr_handle)
1667+
return 0;
1668+
15931669
ret = rte_intr_callback_unregister(priv->intr_handle,
15941670
mana_intr_handler, priv);
15951671
if (ret <= 0) {
@@ -1598,6 +1674,7 @@ mana_intr_uninstall(struct mana_priv *priv)
15981674
}
15991675

16001676
rte_intr_instance_free(priv->intr_handle);
1677+
priv->intr_handle = NULL;
16011678

16021679
return 0;
16031680
}

drivers/net/mana/rx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ mana_stop_rx_queues(struct rte_eth_dev *dev)
177177

178178
for (i = 0; i < priv->num_queues; i++)
179179
if (dev->data->rx_queue_state[i] == RTE_ETH_QUEUE_STATE_STOPPED)
180-
return -EINVAL;
180+
return 0;
181181

182182
if (priv->rwq_qp) {
183183
ret = ibv_destroy_qp(priv->rwq_qp);

drivers/net/mana/tx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mana_stop_tx_queues(struct rte_eth_dev *dev)
1818

1919
for (i = 0; i < priv->num_queues; i++)
2020
if (dev->data->tx_queue_state[i] == RTE_ETH_QUEUE_STATE_STOPPED)
21-
return -EINVAL;
21+
return 0;
2222

2323
for (i = 0; i < priv->num_queues; i++) {
2424
struct mana_txq *txq = dev->data->tx_queues[i];

0 commit comments

Comments
 (0)