Skip to content

Commit 9c151b9

Browse files
authored
Merge pull request #21971 from FRRouting/mergify/bp/stable/10.3/pr-21947
bgpd: cancel LLGR stale timer on peer AF delete (backport #21947)
2 parents 2e0e582 + 7d1a02b commit 9c151b9

5 files changed

Lines changed: 226 additions & 0 deletions

File tree

bgpd/bgpd.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,7 @@ int peer_af_delete(struct peer *peer, afi_t afi, safi_t safi)
925925
bgp_soft_reconfig_table_task_cancel(bgp, bgp->rib[afi][safi], peer);
926926

927927
bgp_stop_announce_route_timer(af);
928+
event_cancel(&peer->t_llgr_stale[afi][safi]);
928929

929930
if (PAF_SUBGRP(af)) {
930931
if (BGP_DEBUG(update_groups, UPDATE_GROUPS))

tests/topotests/bgp_llgr_stale_timer_af_delete/__init__.py

Whitespace-only changes.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
!
2+
interface r1-eth0
3+
ip address 192.168.255.1/24
4+
!
5+
router bgp 65001
6+
bgp router-id 10.255.1.1
7+
no bgp ebgp-requires-policy
8+
no bgp network import-check
9+
bgp graceful-restart
10+
bgp graceful-restart restart-time 0
11+
bgp long-lived-graceful-restart stale-time 10
12+
neighbor 192.168.255.2 remote-as 65002
13+
neighbor 192.168.255.2 timers 1 3
14+
neighbor 192.168.255.2 timers connect 1
15+
address-family ipv4 unicast
16+
network 10.0.0.1/32
17+
neighbor 192.168.255.2 activate
18+
exit-address-family
19+
!
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
!
2+
interface r2-eth0
3+
ip address 192.168.255.2/24
4+
!
5+
router bgp 65002
6+
bgp router-id 10.255.2.2
7+
no bgp ebgp-requires-policy
8+
bgp graceful-restart
9+
bgp graceful-restart restart-time 0
10+
bgp long-lived-graceful-restart stale-time 10
11+
neighbor 192.168.255.1 remote-as 65001
12+
neighbor 192.168.255.1 timers 1 3
13+
neighbor 192.168.255.1 timers connect 1
14+
address-family ipv4 unicast
15+
neighbor 192.168.255.1 activate
16+
exit-address-family
17+
!
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env python
2+
# SPDX-License-Identifier: ISC
3+
4+
"""
5+
Regression test for stale BGP LLGR timers after peer AF deletion.
6+
7+
The test arms peer->t_llgr_stale[afi][safi], deletes the peer AF that was used
8+
as the timer callback argument, and verifies that bgpd survives beyond the
9+
original timer deadline.
10+
"""
11+
12+
import functools
13+
import json
14+
import os
15+
import sys
16+
import time
17+
18+
import pytest
19+
20+
CWD = os.path.dirname(os.path.realpath(__file__))
21+
sys.path.append(os.path.join(CWD, "../"))
22+
23+
# pylint: disable=C0413
24+
from lib import topotest
25+
from lib.common_config import kill_router_daemons, step
26+
from lib.topogen import Topogen, get_topogen
27+
28+
pytestmark = [pytest.mark.bgpd]
29+
30+
R1_PREFIX = "10.0.0.1/32"
31+
R2_AS = 65002
32+
R2_NEIGHBOR = "192.168.255.1"
33+
LLGR_STALE_TIME = 10
34+
35+
36+
def build_topo(tgen):
37+
for routern in range(1, 3):
38+
tgen.add_router("r{}".format(routern))
39+
40+
switch = tgen.add_switch("s1")
41+
switch.add_link(tgen.gears["r1"])
42+
switch.add_link(tgen.gears["r2"])
43+
44+
45+
def setup_module(mod):
46+
tgen = Topogen(build_topo, mod.__name__)
47+
tgen.start_topology()
48+
49+
for rname, router in tgen.routers().items():
50+
router.load_frr_config(os.path.join(CWD, "{}/frr.conf".format(rname)))
51+
52+
tgen.start_router()
53+
54+
55+
def teardown_module(_mod):
56+
tgen = get_topogen()
57+
tgen.stop_topology()
58+
59+
60+
def _bgpd_alive(router):
61+
return (
62+
router.cmd("test -d /proc/$(cat /var/run/frr/bgpd.pid) && echo alive || true")
63+
.strip()
64+
)
65+
66+
67+
def _neighbor_json(router, neighbor):
68+
output = router.vtysh_cmd("show ip bgp neighbor {} json".format(neighbor))
69+
return json.loads(output).get(neighbor, {})
70+
71+
72+
def _prefix_json(router, prefix):
73+
output = router.vtysh_cmd("show ip bgp {} json".format(prefix))
74+
return json.loads(output)
75+
76+
77+
def _route_observation(router, prefix):
78+
output = _prefix_json(router, prefix)
79+
paths = output.get("paths", [])
80+
first_path = paths[0] if paths else {}
81+
community = first_path.get("community", {}) if first_path else {}
82+
83+
return {
84+
"bgpdAlive": _bgpd_alive(router),
85+
"present": bool(paths),
86+
"stale": first_path.get("stale"),
87+
"llgrSecondsRemaining": first_path.get("llgrSecondsRemaining"),
88+
"community": community.get("string"),
89+
}
90+
91+
92+
def test_bgp_llgr_stale_timer_cancelled_on_peer_af_delete():
93+
"""
94+
Deleting a peer AF must cancel any LLGR stale timer using that peer_af.
95+
96+
Broken behavior:
97+
- GR helper mode arms peer->t_llgr_stale[afi][safi] with struct peer_af.
98+
- `no neighbor ... activate` deletes and frees that peer_af.
99+
- The stale timer later fires and dereferences the freed callback argument.
100+
"""
101+
tgen = get_topogen()
102+
103+
if tgen.routers_have_failure():
104+
pytest.skip(tgen.errors)
105+
106+
r1 = tgen.gears["r1"]
107+
r2 = tgen.gears["r2"]
108+
109+
def _r2_bgp_established():
110+
neighbor = _neighbor_json(r2, R2_NEIGHBOR)
111+
if neighbor.get("bgpState") != "Established":
112+
return neighbor
113+
return None
114+
115+
step("Wait for R2 BGP to establish")
116+
test_func = functools.partial(_r2_bgp_established)
117+
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
118+
assert result is None, result
119+
120+
def _r2_has_prefix():
121+
obs = _route_observation(r2, R1_PREFIX)
122+
if not obs["present"]:
123+
return obs
124+
if obs["stale"]:
125+
return obs
126+
return None
127+
128+
step("Wait for R2 to learn R1 prefix")
129+
test_func = functools.partial(_r2_has_prefix)
130+
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
131+
assert result is None, result
132+
133+
step("Stop R1 bgpd so R2 arms the LLGR stale timer")
134+
kill_router_daemons(tgen, "r1", ["bgpd"])
135+
136+
def _r2_llgr_timer_running():
137+
obs = _route_observation(r2, R1_PREFIX)
138+
if not obs["present"]:
139+
return obs
140+
if obs["stale"] is not True:
141+
return obs
142+
if obs["community"] != "llgr-stale":
143+
return obs
144+
if (
145+
obs["llgrSecondsRemaining"] is None
146+
or obs["llgrSecondsRemaining"] < 2
147+
):
148+
return obs
149+
return None
150+
151+
test_func = functools.partial(_r2_llgr_timer_running)
152+
_, result = topotest.run_and_expect(test_func, None, count=40, wait=1)
153+
assert result is None, result
154+
155+
step("Delete R2 IPv4 peer AF before the LLGR stale timer expires")
156+
r2.vtysh_cmd(
157+
"""
158+
configure terminal
159+
router bgp {}
160+
address-family ipv4 unicast
161+
no neighbor {} activate
162+
""".format(
163+
R2_AS, R2_NEIGHBOR
164+
)
165+
)
166+
167+
deadline = time.monotonic() + LLGR_STALE_TIME + 2
168+
169+
def _r2_bgpd_alive_after_stale_timer_deadline():
170+
if _bgpd_alive(r2) != "alive":
171+
return "r2 bgpd is not alive"
172+
173+
remaining = deadline - time.monotonic()
174+
if remaining > 0:
175+
return "waiting {:.1f}s for stale LLGR timer deadline".format(remaining)
176+
177+
return None
178+
179+
step("Verify R2 bgpd stays alive past the stale LLGR timer deadline")
180+
test_func = functools.partial(_r2_bgpd_alive_after_stale_timer_deadline)
181+
_, result = topotest.run_and_expect(
182+
test_func, None, count=LLGR_STALE_TIME + 10, wait=1
183+
)
184+
assert result is None, result
185+
186+
187+
if __name__ == "__main__":
188+
args = ["-s"] + sys.argv[1:]
189+
sys.exit(pytest.main(args))

0 commit comments

Comments
 (0)