Skip to content

Commit cc54d9b

Browse files
authored
Fixed Junos reboots not being detected when waiting for the device to reload (#394)
1 parent 508cf9b commit cc54d9b

3 files changed

Lines changed: 143 additions & 34 deletions

File tree

changes/394.fixed

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed Junos reboots not being detected when waiting for the device to reload.
2+
Increased the default Junos reboot wait timeout from 1 hour to 2 hours.

pyntc/devices/jnpr_device.py

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -228,18 +228,53 @@ def _uptime_to_string(self, uptime_full_string):
228228
days, hours, minutes, seconds = self._uptime_components(uptime_full_string)
229229
return f"{days:02d}:{hours:02d}:{minutes:02d}:{seconds:02d}"
230230

231-
def _wait_for_device_reboot(self, timeout=3600):
231+
def _wait_for_device_reboot(self, original_uptime, timeout=7200):
232+
"""Block until the device reboots and accepts a fresh connection.
233+
234+
Drops the existing NETCONF session and polls for the device to come back.
235+
The reboot is considered complete when a new connection succeeds and the
236+
device reports an uptime lower than ``original_uptime`` (i.e., it has
237+
booted since the reboot was issued).
238+
239+
The pre-reboot session must be discarded first: once the device restarts,
240+
PyEZ still reports it as connected even though the transport is dead, so it
241+
is closed here to force each probe to establish a fresh connection.
242+
243+
Args:
244+
original_uptime (int): Device uptime in seconds captured before the reboot.
245+
timeout (int, optional): Max seconds to wait for the device to return. Defaults to 2 hours.
246+
"""
232247
start = time.time()
233-
disconnected = False
248+
249+
# Drop the pre-reboot NETCONF session so subsequent probes can't read from
250+
# a stale connection PyEZ still reports as connected.
251+
try:
252+
self.close()
253+
except Exception as close_exc: # pylint: disable=broad-exception-caught
254+
log.debug("Host %s: Pre-reboot disconnect raised %s (ignored).", self.host, close_exc)
255+
234256
while time.time() - start < timeout:
235-
if disconnected:
236-
try:
237-
self.open()
257+
try:
258+
self.open()
259+
self._uptime = None
260+
current_uptime = self.uptime
261+
if current_uptime is not None and current_uptime < original_uptime:
262+
log.info(
263+
"Host %s: Device rebooted (uptime %ss < pre-reboot %ss).",
264+
self.host,
265+
current_uptime,
266+
original_uptime,
267+
)
238268
return
239-
except: # noqa E722 # nosec # pylint: disable=bare-except
240-
pass
241-
elif not self.connected:
242-
disconnected = True
269+
log.debug(
270+
"Host %s: Reachable but uptime %ss >= pre-reboot %ss; still waiting.",
271+
self.host,
272+
current_uptime,
273+
original_uptime,
274+
)
275+
except Exception as exc: # pylint: disable=broad-exception-caught
276+
log.debug("Host %s: Reboot probe failed (%s); will retry.", self.host, exc)
277+
self.native.connected = False
243278
time.sleep(10)
244279

245280
raise RebootTimeoutError(hostname=self.hostname, wait_time=timeout)
@@ -318,12 +353,14 @@ def uptime(self):
318353
Returns:
319354
(int): Device uptime in seconds.
320355
"""
321-
try:
322-
native_uptime_string = self.native.facts["RE0"]["up_time"]
323-
except (AttributeError, TypeError):
324-
native_uptime_string = None
325-
326356
if self._uptime is None:
357+
try:
358+
# Bust PyEZ's cached facts so a cold cache always reflects the live device.
359+
self.native.facts_refresh(keys="RE0")
360+
native_uptime_string = self.native.facts["RE0"]["up_time"]
361+
except (AttributeError, TypeError, KeyError):
362+
native_uptime_string = None
363+
327364
if native_uptime_string is not None:
328365
self._uptime = self._uptime_to_seconds(native_uptime_string)
329366

@@ -337,13 +374,16 @@ def uptime_string(self):
337374
Returns:
338375
(str): Device uptime.
339376
"""
340-
try:
341-
native_uptime_string = self.native.facts["RE0"]["up_time"]
342-
except (AttributeError, TypeError):
343-
native_uptime_string = None
344-
345377
if self._uptime_string is None:
346-
self._uptime_string = self._uptime_to_string(native_uptime_string)
378+
try:
379+
# Bust PyEZ's cached facts so a cold cache always reflects the live device.
380+
self.native.facts_refresh(keys="RE0")
381+
native_uptime_string = self.native.facts["RE0"]["up_time"]
382+
except (AttributeError, TypeError, KeyError):
383+
native_uptime_string = None
384+
385+
if native_uptime_string is not None:
386+
self._uptime_string = self._uptime_to_string(native_uptime_string)
347387

348388
return self._uptime_string
349389

@@ -505,13 +545,13 @@ def open(self):
505545
if not self.connected:
506546
self.native.open()
507547

508-
def reboot(self, wait_for_reload=False, timeout=3600, confirm=None):
548+
def reboot(self, wait_for_reload=False, timeout=7200, confirm=None):
509549
"""
510550
Reload the controller or controller pair.
511551
512552
Args:
513553
wait_for_reload (bool): Whether the reboot method should wait for the device to come back up before returning. Defaults to False.
514-
timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 1 hour.
554+
timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 2 hours.
515555
confirm (None): Not used. Deprecated since v0.17.0.
516556
517557
Example:
@@ -522,9 +562,16 @@ def reboot(self, wait_for_reload=False, timeout=3600, confirm=None):
522562
if confirm is not None:
523563
warnings.warn("Passing 'confirm' to reboot method is deprecated.", DeprecationWarning)
524564

565+
self._uptime = None
566+
original_uptime = self.uptime
567+
if original_uptime is None:
568+
raise CommandError(
569+
command="reboot",
570+
message="Could not determine pre-reboot uptime; refusing to wait for reload.",
571+
)
525572
self.sw.reboot(in_min=0)
526573
if wait_for_reload:
527-
self._wait_for_device_reboot(timeout=timeout)
574+
self._wait_for_device_reboot(original_uptime, timeout=timeout)
528575

529576
def rollback(self, filename):
530577
"""Rollback to a specific configuration file.

tests/unit/test_devices/test_jnpr_device.py

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -244,18 +244,28 @@ def test_reboot(self):
244244

245245
@mock.patch("pyntc.devices.jnpr_device.time.sleep")
246246
def test_wait_for_device_to_reboot(self, mock_sleep):
247-
with mock.patch.object(self.device, "open") as mock_open:
248-
# Emulate the device disconnected and reconnecting
249-
type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False, True])
250-
mock_open.side_effect = [Exception, Exception, True]
251-
self.device.reboot(wait_for_reload=True, timeout=3)
252-
mock_open.assert_has_calls([mock.call()] * 3)
247+
"""Reboot completes when the device returns with a lower uptime than the baseline."""
248+
with (
249+
mock.patch.object(self.device, "open") as mock_open,
250+
mock.patch.object(self.device, "close"),
251+
mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime,
252+
):
253+
# First read is the pre-reboot baseline; second is the post-reboot uptime.
254+
mock_uptime.side_effect = [455, 30]
255+
self.device.reboot(wait_for_reload=True, timeout=30)
256+
257+
self.device.sw.reboot.assert_called_with(in_min=0)
258+
mock_open.assert_called()
253259

254260
@mock.patch("pyntc.devices.jnpr_device.time.sleep")
255261
def test_wait_for_device_to_reboot_error(self, mock_sleep):
256-
with mock.patch.object(self.device, "open") as mock_open:
257-
type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False])
258-
mock_open.side_effect = Exception
262+
"""Raise RebootTimeoutError when the device never reports a lower uptime within the timeout."""
263+
with (
264+
mock.patch.object(self.device, "open"),
265+
mock.patch.object(self.device, "close"),
266+
mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime,
267+
):
268+
mock_uptime.return_value = 455
259269
with pytest.raises(RebootTimeoutError):
260270
self.device.reboot(wait_for_reload=True, timeout=1)
261271

@@ -290,12 +300,62 @@ def test_checkpoint(self, mock_scp):
290300
self.device.show.assert_called_with("show config")
291301

292302
def test_uptime(self):
303+
"""Cold cache (_uptime is None) refreshes facts and parses the uptime."""
304+
self.assertIsNone(self.device._uptime)
305+
uptime = self.device.uptime
306+
self.assertEqual(uptime, 455)
307+
self.device.native.facts_refresh.assert_called_once_with(keys="RE0")
308+
309+
def test_uptime_cached(self):
310+
"""A populated cache is returned as-is, with no device round-trip."""
311+
self.device._uptime = 1234
293312
uptime = self.device.uptime
294-
assert uptime == 455
313+
self.assertEqual(uptime, 1234)
314+
self.device.native.facts_refresh.assert_not_called()
315+
316+
def test_uptime_refreshes_after_cache_cleared(self):
317+
"""Clearing the cache forces a fresh read."""
318+
self.assertEqual(self.device.uptime, 455)
319+
320+
self.device._uptime = None
321+
self.device.native.facts = {"RE0": {"up_time": "30 seconds"}}
322+
323+
self.assertEqual(self.device.uptime, 30)
324+
325+
def test_uptime_none_when_facts_unavailable(self):
326+
"""Missing/unavailable facts return None gracefully instead of raising an Exception."""
327+
self.device._uptime = None
328+
self.device.native.facts = {}
329+
self.assertIsNone(self.device.uptime)
295330

296331
def test_uptime_string(self):
332+
"""Cold cache (_uptime_string is None) refreshes facts and formats the uptime."""
333+
self.assertIsNone(self.device._uptime_string)
334+
uptime_string = self.device.uptime_string
335+
self.assertEqual(uptime_string, "00:00:07:35")
336+
self.device.native.facts_refresh.assert_called_once_with(keys="RE0")
337+
338+
def test_uptime_string_cached(self):
339+
"""A populated cache is returned as-is, with no device round-trip."""
340+
self.device._uptime_string = "01:02:03:04"
297341
uptime_string = self.device.uptime_string
298-
assert uptime_string == "00:00:07:35"
342+
self.assertEqual(uptime_string, "01:02:03:04")
343+
self.device.native.facts_refresh.assert_not_called()
344+
345+
def test_uptime_string_refreshes_after_cache_cleared(self):
346+
"""Clearing the cache forces a fresh read."""
347+
self.assertEqual(self.device.uptime_string, "00:00:07:35")
348+
349+
self.device._uptime_string = None
350+
self.device.native.facts = {"RE0": {"up_time": "30 seconds"}}
351+
352+
self.assertEqual(self.device.uptime_string, "00:00:00:30")
353+
354+
def test_uptime_string_none_when_facts_unavailable(self):
355+
"""Missing/unavailable facts return None gracefully instead of raising an Exception."""
356+
self.device._uptime_string = None
357+
self.device.native.facts = {}
358+
self.assertIsNone(self.device.uptime_string)
299359

300360
def test_vendor(self):
301361
vendor = self.device.vendor

0 commit comments

Comments
 (0)