Skip to content

Commit 59e6512

Browse files
committed
fix(controller): reconcile stuck "modified" config status on checksum request
When a device applies a new configuration but fails to report its status back to the controller (e.g., due to a transient HTTP 502 error during server maintenance), the config remains in "modified" state indefinitely. The device's agent compares its local checksum with the remote checksum on subsequent polling cycles and finds they match, so it won't re-download or re-report. The status stays "modified" even though the device already has the current configuration. Fix: When a device with "modified" status requests its checksum and enough time has passed since the config was last modified (grace period of 5 minutes, configurable via _STATUS_RECONCILE_GRACE_SECONDS), reconcile the status to "applied". The grace period ensures we don't prematurely reconcile during the normal apply cycle. This fix is backward compatible — it requires no changes to the openwisp-config agent on the device side.
1 parent 0d17acd commit 59e6512

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

openwisp_controller/config/controller/views.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,13 @@ class DeviceChecksumView(UpdateLastIpMixin, GetDeviceView):
144144
returns device's configuration checksum
145145
"""
146146

147+
# Grace period before reconciling a "modified" status.
148+
# If the config has been in "modified" state for longer than this,
149+
# and the device is actively requesting its checksum (proving it's
150+
# online and polling), we assume a previous report_status call was
151+
# lost due to a transient error and reconcile the status to "applied".
152+
_STATUS_RECONCILE_GRACE_SECONDS = 300 # 5 minutes
153+
147154
def get(self, request, pk):
148155
device = self.get_device()
149156
bad_request = forbid_unallowed(request, "GET", "key", device.key)
@@ -156,10 +163,55 @@ def get(self, request, pk):
156163
checksum_requested.send(
157164
sender=device.__class__, instance=device, request=request
158165
)
166+
self._reconcile_modified_status(device)
159167
return ControllerResponse(
160168
device.config.get_cached_checksum(), content_type="text/plain"
161169
)
162170

171+
@staticmethod
172+
def _reconcile_modified_status(device):
173+
"""
174+
Reconciles config status for devices stuck in "modified" state.
175+
176+
When a device applies a new configuration but fails to report its
177+
status back (e.g., due to a transient HTTP error like 502), the
178+
config remains in "modified" state on the controller even though
179+
the device already has the current configuration.
180+
181+
The device's agent will compare its local checksum with the
182+
remote checksum on the next polling cycle and find they match,
183+
so it won't re-download or re-report. The status stays "modified"
184+
indefinitely.
185+
186+
This method detects this condition: if the config has been in
187+
"modified" state for longer than the grace period and the device
188+
is actively polling (proven by this very checksum request), we
189+
set the status to "applied".
190+
"""
191+
from django.utils import timezone
192+
193+
config = device.config
194+
if config.status != "modified":
195+
return
196+
grace = DeviceChecksumView._STATUS_RECONCILE_GRACE_SECONDS
197+
elapsed = (timezone.now() - config.modified).total_seconds()
198+
if elapsed < grace:
199+
return
200+
try:
201+
config.set_status_applied()
202+
logger.info(
203+
"Reconciled config status for device %s: was 'modified' "
204+
"for %d seconds with device actively polling, "
205+
"setting to 'applied'.",
206+
device,
207+
int(elapsed),
208+
)
209+
except Exception:
210+
logger.exception(
211+
"Failed to reconcile config status for device %s",
212+
device,
213+
)
214+
163215
@cache_memoize(
164216
timeout=Config._CHECKSUM_CACHE_TIMEOUT, args_rewrite=get_device_args_rewrite
165217
)

openwisp_controller/config/tests/test_controller.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,63 @@ def test_device_checksum_requested_signal_is_emitted(self):
270270
request=response.wsgi_request,
271271
)
272272

273+
def test_device_checksum_reconciles_modified_status(self):
274+
"""
275+
When a device with status "modified" requests its checksum,
276+
and enough time has passed (grace period), the status should
277+
be automatically reconciled to "applied".
278+
"""
279+
from unittest.mock import PropertyMock
280+
281+
d = self._create_device_config()
282+
c = d.config
283+
c.set_status_modified()
284+
self.assertEqual(c.status, "modified")
285+
url = reverse("controller:device_checksum", args=[d.pk])
286+
287+
# First request within grace period: status should stay "modified"
288+
response = self.client.get(url, {"key": d.key})
289+
self.assertEqual(response.status_code, 200)
290+
c.refresh_from_db()
291+
self.assertEqual(c.status, "modified")
292+
293+
# Simulate that grace period has elapsed by backdating the modified timestamp
294+
from django.utils import timezone
295+
from datetime import timedelta
296+
297+
Config.objects.filter(pk=c.pk).update(
298+
modified=timezone.now() - timedelta(seconds=600)
299+
)
300+
301+
# Second request after grace period: status should be reconciled to "applied"
302+
response = self.client.get(url, {"key": d.key})
303+
self.assertEqual(response.status_code, 200)
304+
c.refresh_from_db()
305+
self.assertEqual(c.status, "applied")
306+
307+
def test_device_checksum_no_reconcile_for_applied(self):
308+
"""Status "applied" should not be changed."""
309+
d = self._create_device_config()
310+
c = d.config
311+
c.set_status_applied()
312+
url = reverse("controller:device_checksum", args=[d.pk])
313+
response = self.client.get(url, {"key": d.key})
314+
self.assertEqual(response.status_code, 200)
315+
c.refresh_from_db()
316+
self.assertEqual(c.status, "applied")
317+
318+
def test_device_checksum_no_reconcile_within_grace_period(self):
319+
"""Status should not be reconciled if within the grace period."""
320+
d = self._create_device_config()
321+
c = d.config
322+
c.set_status_modified()
323+
url = reverse("controller:device_checksum", args=[d.pk])
324+
response = self.client.get(url, {"key": d.key})
325+
self.assertEqual(response.status_code, 200)
326+
c.refresh_from_db()
327+
self.assertEqual(c.status, "modified")
328+
329+
273330
def test_device_checksum_bad_uuid(self):
274331
d = self._create_device_config()
275332
pk = "{}-wrong".format(d.pk)

0 commit comments

Comments
 (0)