Skip to content

Commit 5ba0fb4

Browse files
Added pre-upgrade check for defect CSCwt69100
1 parent 07ea2db commit 5ba0fb4

7 files changed

Lines changed: 204 additions & 1 deletion

aci-preupgrade-validation-script.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from textwrap import TextWrapper
2323
from getpass import getpass
2424
from collections import defaultdict, OrderedDict
25-
from datetime import datetime
25+
from datetime import datetime, timedelta
2626
from argparse import ArgumentParser
2727
from itertools import chain
2828
import threading
@@ -6293,6 +6293,40 @@ def multipod_modular_spine_bootscript_check(tversion, fabric_nodes, username, pa
62936293
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
62946294

62956295

6296+
@check_wrapper(check_title="Stale dbgacEpgSummaryTask Objects")
6297+
def stale_epg_summary_task_check(tversion, **kwargs):
6298+
result = PASS
6299+
headers = ["DN", "Start Time"]
6300+
data = []
6301+
recommended_action = "Delete the listed stale dbgacEpgSummaryTask objects to prevent policymgr crash."
6302+
doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#stale-dbgacepgsummarytask-objects"
6303+
6304+
if not tversion:
6305+
return Result(result=MANUAL, msg=TVER_MISSING)
6306+
6307+
version_affected = (
6308+
(tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)")))
6309+
or (tversion.major1 == "6" and tversion.major2 == "2" and (tversion.older_than("6.2(1g)") or tversion.same_as("6.2(1g)")))
6310+
)
6311+
if not version_affected:
6312+
return Result(result=NA, msg=VER_NOT_AFFECTED)
6313+
6314+
threshold = datetime.utcnow() - timedelta(hours=24)
6315+
for obj in icurl("class", 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'):
6316+
attr = obj["dbgacEpgSummaryTask"]["attributes"]
6317+
dn = attr.get("dn", "")
6318+
start_ts = attr.get("startTs", "")
6319+
try:
6320+
task_dt = datetime.strptime(start_ts[:19], "%Y-%m-%dT%H:%M:%S")
6321+
except ValueError:
6322+
continue
6323+
if task_dt < threshold:
6324+
data.append([dn, start_ts])
6325+
6326+
if data:
6327+
result = FAIL_O
6328+
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
6329+
62966330
# ---- Script Execution ----
62976331

62986332

@@ -6462,6 +6496,7 @@ class CheckManager:
64626496
auto_firmware_update_on_switch_check,
64636497
rogue_ep_coop_exception_mac_check,
64646498
n9k_c9408_model_lem_count_check,
6499+
stale_epg_summary_task_check,
64656500
]
64666501
ssh_checks = [
64676502
# General

docs/docs/validations.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ Items | Defect | This Script
198198
[Rogue EP Exception List missing on switches][d30] | CSCwp64296 | :white_check_mark: | :no_entry_sign:
199199
[N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign:
200200
[Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign:
201+
[Stale dbgacEpgSummaryTask Objects][d33] | CSCwt69100 | :white_check_mark: | :no_entry_sign:
201202

202203
[d1]: #ep-announce-compatibility
203204
[d2]: #eventmgr-db-size-defect-susceptibility
@@ -231,6 +232,7 @@ Items | Defect | This Script
231232
[d30]: #rogue-ep-exception-list-missing-on-switches
232233
[d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems
233234
[d32]: #multi-pod-modular-spine-bootscript-file
235+
[d33]: #stale-dbgacepgsummarytask-objects
234236

235237
## General Check Details
236238

@@ -2753,7 +2755,17 @@ This issue happens only when the target version is specifically 6.1(4h).
27532755
To avoid this issue, change the target version to another version. Or verify that the `bootscript` file exists in the bootflash of each modular spine switch prior to upgrading to 6.1(4h). If the file is missing, you have to do clean reboot on the impacted spine to ensure that `/bootflash/bootscript` gets created again. In case you already upgraded your spine and you are experiencing the traffic impact due to this issue, clean reboot of the spine will restore the traffic.
27542756

27552757

2758+
### Stale dbgacEpgSummaryTask Objects
2759+
2760+
Due to [CSCwt69100][68], a stale `dbgacEpgSummaryTask` object stuck in `processing` state with empty content can cause the policymgr process to crash on all APICs during an upgrade or process restart.
2761+
2762+
Affected versions: version <= 6.1(5e) or version <= 6.2(1g).
2763+
2764+
The check queries for `dbgacEpgSummaryTask` objects with `operSt="processing"` and `startTs` older than 24 hours. Such objects are considered stale and unexpected. If found, delete them before proceeding with the upgrade to prevent policymgr from crashing on restart.
2765+
2766+
27562767
[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
2768+
[68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100
27572769
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
27582770
[2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
27592771
[3]: https://www.cisco.com/c/en/us/td/docs/dcn/aci/apic/5x/release-notes/cisco-aci-nx-os-release-notes-1501.html#_Toc140580685
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2024-01-01T00:00:00.000+00:00"
8+
}
9+
}
10+
},
11+
{
12+
"dbgacEpgSummaryTask": {
13+
"attributes": {
14+
"dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef",
15+
"operSt": "processing",
16+
"startTs": "2026-01-15T11:30:00.000+00:00"
17+
}
18+
}
19+
}
20+
]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2026-01-15T11:30:00.000+00:00"
8+
}
9+
}
10+
}
11+
]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2024-01-01T00:00:00.000+00:00"
8+
}
9+
}
10+
}
11+
]
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os
2+
import pytest
3+
import importlib
4+
from datetime import datetime
5+
from helpers.utils import read_data
6+
7+
script = importlib.import_module("aci-preupgrade-validation-script")
8+
9+
dir = os.path.dirname(os.path.abspath(__file__))
10+
11+
test_function = "stale_epg_summary_task_check"
12+
13+
# icurl query key
14+
task_api = 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'
15+
16+
# Fixed "now" used by mock_datetime fixture: 2026-01-15 12:00:00 UTC
17+
# Stale threshold = 2026-01-14 12:00:00 UTC (24h before fixed now)
18+
# dbgacEpgSummaryTask_stale.json -> startTs 2024-01-01 (way before threshold) -> FAIL_O
19+
# dbgacEpgSummaryTask_recent.json -> startTs 2026-01-15 11:30 UTC (30 min before fixed now) -> PASS
20+
FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0)
21+
22+
23+
class MockDatetime:
24+
"""Replaces datetime class in script to return a fixed 'now' for deterministic tests."""
25+
@staticmethod
26+
def utcnow():
27+
return FIXED_NOW
28+
29+
@staticmethod
30+
def strptime(date_string, format):
31+
return datetime.strptime(date_string, format)
32+
33+
def __new__(cls, *args, **kwargs):
34+
return datetime(*args, **kwargs)
35+
36+
37+
@pytest.fixture
38+
def mock_datetime(monkeypatch):
39+
"""Monkeypatches script.datetime so utcnow() returns a fixed timestamp."""
40+
monkeypatch.setattr(script, "datetime", MockDatetime)
41+
42+
43+
@pytest.mark.parametrize(
44+
"tversion, icurl_outputs, expected_result, expected_data",
45+
[
46+
# Case 1: Target version 6.2(2a) is beyond both affected ranges (6.1(5e) and 6.2(1g)).
47+
# The target binary has the fix so version gate fails. Expected: NA without any API calls.
48+
(
49+
"6.2(2a)",
50+
{},
51+
script.NA,
52+
[],
53+
),
54+
# Case 2: Target version 6.1(5e) is affected, no dbgacEpgSummaryTask objects found.
55+
# No stale tasks present — system is safe. Expected: PASS.
56+
(
57+
"6.1(5e)",
58+
{
59+
task_api: read_data(dir, "dbgacEpgSummaryTask_empty.json"),
60+
},
61+
script.PASS,
62+
[],
63+
),
64+
# Case 3: Target version 6.1(5e) is affected, one task in processing state but startTs is
65+
# only 30 minutes old (within 24-hour threshold). Not considered stale.
66+
# Expected: PASS.
67+
(
68+
"6.1(5e)",
69+
{
70+
task_api: read_data(dir, "dbgacEpgSummaryTask_recent.json"),
71+
},
72+
script.PASS,
73+
[],
74+
),
75+
# Case 4: Target version 6.1(5e) is affected, one task stuck in processing with startTs
76+
# from 2024 (way older than 24 hours). Stale task detected.
77+
# Expected: FAIL_O with the offending DN and startTs reported.
78+
(
79+
"6.1(5e)",
80+
{
81+
task_api: read_data(dir, "dbgacEpgSummaryTask_stale.json"),
82+
},
83+
script.FAIL_O,
84+
[
85+
[
86+
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
87+
"2024-01-01T00:00:00.000+00:00",
88+
]
89+
],
90+
),
91+
# Case 5: Target version 6.2(1g) is affected, two tasks — one stale (2024), one recent.
92+
# Only the stale task should be reported. Expected: FAIL_O with one row.
93+
(
94+
"6.2(1g)",
95+
{
96+
task_api: read_data(dir, "dbgacEpgSummaryTask_mixed.json"),
97+
},
98+
script.FAIL_O,
99+
[
100+
[
101+
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
102+
"2024-01-01T00:00:00.000+00:00",
103+
]
104+
],
105+
),
106+
],
107+
)
108+
def test_logic(run_check, mock_icurl, mock_datetime, tversion, icurl_outputs, expected_result, expected_data):
109+
result = run_check(
110+
tversion=script.AciVersion(tversion),
111+
)
112+
assert result.result == expected_result
113+
assert result.data == expected_data

0 commit comments

Comments
 (0)