Skip to content

Commit 9fe8d7d

Browse files
committed
net/netvsc: handle VF recovery events for service reset
Register callbacks for RTE_ETH_EVENT_ERR_RECOVERING, RTE_ETH_EVENT_RECOVERY_SUCCESS, and RTE_ETH_EVENT_RECOVERY_FAILED events on the VF port to handle MANA service resets. - On ERR_RECOVERING: defer data path switch to synthetic via rte_eal_alarm_set, keeping VF attached in DPDK - On RECOVERY_SUCCESS: defer data path switch back to VF - On RECOVERY_FAILED: do full VF removal (same as INTR_RMV) - Unregister all recovery callbacks and cancel pending alarms during detach, removal, and close All recovery callbacks defer work via rte_eal_alarm_set, consistent with the existing INTR_RMV pattern, to avoid cross-driver lock-order assumptions in event-callback context. This ensures that during a service reset (kernel suspend/resume without PCI remove), netvsc keeps the VF attached and seamlessly switches back to it after recovery, without requiring a PCI hot-add event. This change is compatible with the current behavior when no service reset messages are received. Signed-off-by: Long Li <longli@microsoft.com>
1 parent aa1b1e6 commit 9fe8d7d

1 file changed

Lines changed: 165 additions & 0 deletions

File tree

drivers/net/netvsc/hn_vf.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ static int hn_vf_match(const struct rte_eth_dev *dev)
5050
}
5151

5252

53+
static int hn_eth_recovering_callback(uint16_t port_id,
54+
enum rte_eth_event_type event, void *cb_arg, void *out);
55+
static int hn_eth_recovery_success_callback(uint16_t port_id,
56+
enum rte_eth_event_type event, void *cb_arg, void *out);
57+
static int hn_eth_recovery_failed_callback(uint16_t port_id,
58+
enum rte_eth_event_type event, void *cb_arg, void *out);
59+
5360
/*
5461
* Attach new PCI VF device and return the port_id
5562
*/
@@ -111,7 +118,56 @@ static int hn_vf_attach(struct rte_eth_dev *dev, struct hn_data *hv)
111118
return ret;
112119
}
113120

121+
/* Register recovery event callbacks for service reset handling */
122+
ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
123+
RTE_ETH_EVENT_ERR_RECOVERING,
124+
hn_eth_recovering_callback, hv);
125+
if (ret) {
126+
PMD_DRV_LOG(ERR,
127+
"Registering recovering callback failed for vf port %d ret %d",
128+
port, ret);
129+
goto err_recovering;
130+
}
131+
132+
ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
133+
RTE_ETH_EVENT_RECOVERY_SUCCESS,
134+
hn_eth_recovery_success_callback, hv);
135+
if (ret) {
136+
PMD_DRV_LOG(ERR,
137+
"Registering recovery success callback failed for vf port %d ret %d",
138+
port, ret);
139+
goto err_recovery_success;
140+
}
141+
142+
ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
143+
RTE_ETH_EVENT_RECOVERY_FAILED,
144+
hn_eth_recovery_failed_callback, hv);
145+
if (ret) {
146+
PMD_DRV_LOG(ERR,
147+
"Registering recovery failed callback failed for vf port %d ret %d",
148+
port, ret);
149+
goto err_recovery_failed;
150+
}
151+
114152
return 0;
153+
154+
err_recovery_failed:
155+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
156+
RTE_ETH_EVENT_RECOVERY_SUCCESS,
157+
hn_eth_recovery_success_callback, hv);
158+
err_recovery_success:
159+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
160+
RTE_ETH_EVENT_ERR_RECOVERING,
161+
hn_eth_recovering_callback, hv);
162+
err_recovering:
163+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
164+
RTE_ETH_EVENT_INTR_RMV,
165+
hn_eth_rmv_event_callback, hv);
166+
hv->vf_ctx.vf_attached = false;
167+
hv->vf_ctx.vf_port = 0;
168+
if (rte_eth_dev_owner_unset(port, hv->owner.id) < 0)
169+
PMD_DRV_LOG(ERR, "Failed to unset owner for port %d", port);
170+
return ret;
115171
}
116172

117173
static void hn_vf_remove_unlocked(struct hn_data *hv);
@@ -143,6 +199,12 @@ static void hn_remove_delayed(void *args)
143199
PMD_DRV_LOG(ERR,
144200
"rte_eth_dev_callback_unregister failed ret=%d",
145201
ret);
202+
rte_eth_dev_callback_unregister(port_id, RTE_ETH_EVENT_ERR_RECOVERING,
203+
hn_eth_recovering_callback, hv);
204+
rte_eth_dev_callback_unregister(port_id, RTE_ETH_EVENT_RECOVERY_SUCCESS,
205+
hn_eth_recovery_success_callback, hv);
206+
rte_eth_dev_callback_unregister(port_id, RTE_ETH_EVENT_RECOVERY_FAILED,
207+
hn_eth_recovery_failed_callback, hv);
146208

147209
/* Detach and release port_id from system */
148210
ret = rte_eth_dev_stop(port_id);
@@ -187,6 +249,89 @@ int hn_eth_rmv_event_callback(uint16_t port_id,
187249
return 0;
188250
}
189251

252+
/*
253+
* Deferred handler for VF error recovery event.
254+
* Switch data path to synthetic but keep the VF attached.
255+
*/
256+
static void hn_recovering_delayed(void *args)
257+
{
258+
struct hn_data *hv = args;
259+
260+
rte_rwlock_write_lock(&hv->vf_lock);
261+
hn_vf_remove_unlocked(hv);
262+
rte_rwlock_write_unlock(&hv->vf_lock);
263+
}
264+
265+
static int
266+
hn_eth_recovering_callback(uint16_t port_id,
267+
enum rte_eth_event_type event __rte_unused,
268+
void *cb_arg, void *out __rte_unused)
269+
{
270+
struct hn_data *hv = cb_arg;
271+
272+
PMD_DRV_LOG(NOTICE, "VF port %u recovering from error", port_id);
273+
rte_eal_alarm_set(1, hn_recovering_delayed, hv);
274+
275+
return 0;
276+
}
277+
278+
/*
279+
* Deferred handler for VF recovery success event.
280+
* Switch data path back to VF.
281+
*/
282+
static void hn_recovery_success_delayed(void *args)
283+
{
284+
struct hn_data *hv = args;
285+
struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
286+
int ret;
287+
288+
rte_rwlock_write_lock(&hv->vf_lock);
289+
/* Only switch data path to VF if the netvsc device is started,
290+
* mirroring the check in hn_vf_add_unlocked. If the device was
291+
* stopped during recovery, defer to hn_vf_start().
292+
*/
293+
if (dev->data->dev_started &&
294+
hv->vf_ctx.vf_attached && !hv->vf_ctx.vf_vsc_switched) {
295+
ret = hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
296+
if (ret)
297+
PMD_DRV_LOG(ERR,
298+
"Failed to switch to VF after recovery");
299+
else
300+
hv->vf_ctx.vf_vsc_switched = true;
301+
}
302+
rte_rwlock_write_unlock(&hv->vf_lock);
303+
}
304+
305+
static int
306+
hn_eth_recovery_success_callback(uint16_t port_id,
307+
enum rte_eth_event_type event __rte_unused,
308+
void *cb_arg, void *out __rte_unused)
309+
{
310+
struct hn_data *hv = cb_arg;
311+
312+
PMD_DRV_LOG(NOTICE, "VF port %u recovery succeeded", port_id);
313+
rte_eal_alarm_set(1, hn_recovery_success_delayed, hv);
314+
315+
return 0;
316+
}
317+
318+
/*
319+
* Handle VF recovery failure event from MANA PMD.
320+
* VF is unusable, do full removal.
321+
*/
322+
static int
323+
hn_eth_recovery_failed_callback(uint16_t port_id,
324+
enum rte_eth_event_type event __rte_unused,
325+
void *cb_arg, void *out __rte_unused)
326+
{
327+
struct hn_data *hv = cb_arg;
328+
329+
PMD_DRV_LOG(NOTICE, "VF port %u recovery failed, removing", port_id);
330+
rte_eal_alarm_set(1, hn_remove_delayed, hv);
331+
332+
return 0;
333+
}
334+
190335
static int hn_setup_vf_queues(int port, struct rte_eth_dev *dev)
191336
{
192337
struct hn_rx_queue *rx_queue;
@@ -247,6 +392,12 @@ static void hn_vf_detach(struct hn_data *hv)
247392

248393
rte_eth_dev_callback_unregister(port, RTE_ETH_EVENT_INTR_RMV,
249394
hn_eth_rmv_event_callback, hv);
395+
rte_eth_dev_callback_unregister(port, RTE_ETH_EVENT_ERR_RECOVERING,
396+
hn_eth_recovering_callback, hv);
397+
rte_eth_dev_callback_unregister(port, RTE_ETH_EVENT_RECOVERY_SUCCESS,
398+
hn_eth_recovery_success_callback, hv);
399+
rte_eth_dev_callback_unregister(port, RTE_ETH_EVENT_RECOVERY_FAILED,
400+
hn_eth_recovery_failed_callback, hv);
250401

251402
if (rte_eth_dev_owner_unset(port, hv->owner.id) < 0)
252403
PMD_DRV_LOG(ERR, "Failed to unset owner for port %d", port);
@@ -630,7 +781,21 @@ int hn_vf_close(struct rte_eth_dev *dev)
630781
RTE_ETH_EVENT_INTR_RMV,
631782
hn_eth_rmv_event_callback,
632783
hv);
784+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
785+
RTE_ETH_EVENT_ERR_RECOVERING,
786+
hn_eth_recovering_callback,
787+
hv);
788+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
789+
RTE_ETH_EVENT_RECOVERY_SUCCESS,
790+
hn_eth_recovery_success_callback,
791+
hv);
792+
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
793+
RTE_ETH_EVENT_RECOVERY_FAILED,
794+
hn_eth_recovery_failed_callback,
795+
hv);
633796
rte_eal_alarm_cancel(hn_remove_delayed, hv);
797+
rte_eal_alarm_cancel(hn_recovering_delayed, hv);
798+
rte_eal_alarm_cancel(hn_recovery_success_delayed, hv);
634799
ret = rte_eth_dev_close(hv->vf_ctx.vf_port);
635800
hv->vf_ctx.vf_attached = false;
636801
}

0 commit comments

Comments
 (0)