Skip to content

Commit a66b043

Browse files
Added pre-upgrade check for defect CSCwt69100
1 parent 0d28b80 commit a66b043

7 files changed

Lines changed: 206 additions & 3 deletions

aci-preupgrade-validation-script.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from textwrap import TextWrapper
2323
from getpass import getpass
2424
from collections import defaultdict, OrderedDict
25-
from datetime import datetime
25+
from datetime import datetime, timedelta
2626
from argparse import ArgumentParser
2727
from itertools import chain
2828
import threading
@@ -6410,6 +6410,40 @@ def svccore_excessive_data_check(**kwargs):
64106410
return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url)
64116411

64126412

6413+
@check_wrapper(check_title="Stale dbgacEpgSummaryTask Objects")
6414+
def stale_epg_summary_task_check(tversion, **kwargs):
6415+
result = PASS
6416+
headers = ["DN", "Start Time"]
6417+
data = []
6418+
recommended_action = "Delete the listed stale dbgacEpgSummaryTask objects to prevent policymgr crash."
6419+
doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#stale-dbgacepgsummarytask-objects"
6420+
6421+
if not tversion:
6422+
return Result(result=MANUAL, msg=TVER_MISSING)
6423+
6424+
version_affected = (
6425+
(tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)")))
6426+
or (tversion.major1 == "6" and tversion.major2 == "2" and (tversion.older_than("6.2(1g)") or tversion.same_as("6.2(1g)")))
6427+
)
6428+
if not version_affected:
6429+
return Result(result=NA, msg=VER_NOT_AFFECTED)
6430+
6431+
threshold = datetime.utcnow() - timedelta(hours=24)
6432+
for obj in icurl("class", 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'):
6433+
attr = obj["dbgacEpgSummaryTask"]["attributes"]
6434+
dn = attr.get("dn", "")
6435+
start_ts = attr.get("startTs", "")
6436+
try:
6437+
task_dt = datetime.strptime(start_ts[:19], "%Y-%m-%dT%H:%M:%S")
6438+
except ValueError:
6439+
continue
6440+
if task_dt < threshold:
6441+
data.append([dn, start_ts])
6442+
6443+
if data:
6444+
result = FAIL_O
6445+
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
6446+
64136447
# ---- Script Execution ----
64146448

64156449

@@ -6581,6 +6615,7 @@ class CheckManager:
65816615
rogue_ep_coop_exception_mac_check,
65826616
n9k_c9408_model_lem_count_check,
65836617
inband_management_policy_misconfig_check,
6618+
stale_epg_summary_task_check,
65846619
]
65856620
ssh_checks = [
65866621
# General

docs/docs/validations.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ Items | Defect | This Script
203203
[N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign:
204204
[Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign:
205205
[Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign:
206+
[Stale dbgacEpgSummaryTask Objects][d34] | CSCwt69100 | :white_check_mark: | :no_entry_sign:
206207

207208
[d1]: #ep-announce-compatibility
208209
[d2]: #eventmgr-db-size-defect-susceptibility
@@ -237,6 +238,7 @@ Items | Defect | This Script
237238
[d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems
238239
[d32]: #multi-pod-modular-spine-bootscript-file
239240
[d33]: #inband-management-policy-misconfiguration
241+
[d34]: #stale-dbgacepgsummarytask-objects
240242

241243
## General Check Details
242244

@@ -2792,13 +2794,22 @@ Due to excessive `svccoreCtrlr` or `svccoreNode` managed objects, Apic gui stuck
27922794

27932795
The svccoreCtrlr and svccoreNode objects represent core files related to Apic and Leaf/Spines process respectively.
27942796

2795-
Due to [CSCws84232][67], the APIC GUI may become unresponsive after login, with dashboards stuck in a continuous Loading…state.
2797+
Due to [CSCws84232][67], the APIC GUI may become unresponsive after login, with dashboards stuck in a continuous "Loading…"state.
27962798
Administrators may be unable to access or operate the APIC GUI, potentially impacting day-to-day management or upgrade.
27972799

27982800
This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade.
27992801

2802+
### Stale dbgacEpgSummaryTask Objects
2803+
2804+
Due to [CSCwt69100][70], a stale `dbgacEpgSummaryTask` object stuck in `processing` state with empty content can cause the policymgr process to crash on all APICs during an upgrade or process restart.
2805+
2806+
Affected versions: version <= 6.1(5e) or version <= 6.2(1g).
2807+
2808+
The check queries for `dbgacEpgSummaryTask` objects with `operSt="processing"` and `startTs` older than 24 hours. Such objects are considered stale and unexpected. If found, delete them before proceeding with the upgrade to prevent policymgr from crashing on restart.
2809+
28002810

28012811
[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
2812+
[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100
28022813
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
28032814
[2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
28042815
[3]: https://www.cisco.com/c/en/us/td/docs/dcn/aci/apic/5x/release-notes/cisco-aci-nx-os-release-notes-1501.html#_Toc140580685
@@ -2867,4 +2878,5 @@ This check will verify the count of the `svccoreCtrlr` Managed Object and raise
28672878
[66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848
28682879
[67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837
28692880
[68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071
2870-
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232
2881+
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232
2882+
[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2024-01-01T00:00:00.000+00:00"
8+
}
9+
}
10+
},
11+
{
12+
"dbgacEpgSummaryTask": {
13+
"attributes": {
14+
"dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef",
15+
"operSt": "processing",
16+
"startTs": "2026-01-15T11:30:00.000+00:00"
17+
}
18+
}
19+
}
20+
]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2026-01-15T11:30:00.000+00:00"
8+
}
9+
}
10+
}
11+
]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[
2+
{
3+
"dbgacEpgSummaryTask": {
4+
"attributes": {
5+
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
6+
"operSt": "processing",
7+
"startTs": "2024-01-01T00:00:00.000+00:00"
8+
}
9+
}
10+
}
11+
]
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os
2+
import pytest
3+
import importlib
4+
from datetime import datetime
5+
from helpers.utils import read_data
6+
7+
script = importlib.import_module("aci-preupgrade-validation-script")
8+
9+
dir = os.path.dirname(os.path.abspath(__file__))
10+
11+
test_function = "stale_epg_summary_task_check"
12+
13+
# icurl query key
14+
task_api = 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'
15+
16+
# Fixed "now" used by mock_datetime fixture: 2026-01-15 12:00:00 UTC
17+
# Stale threshold = 2026-01-14 12:00:00 UTC (24h before fixed now)
18+
# dbgacEpgSummaryTask_stale.json -> startTs 2024-01-01 (way before threshold) -> FAIL_O
19+
# dbgacEpgSummaryTask_recent.json -> startTs 2026-01-15 11:30 UTC (30 min before fixed now) -> PASS
20+
FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0)
21+
22+
23+
class MockDatetime:
24+
"""Replaces datetime class in script to return a fixed 'now' for deterministic tests."""
25+
@staticmethod
26+
def utcnow():
27+
return FIXED_NOW
28+
29+
@staticmethod
30+
def strptime(date_string, format):
31+
return datetime.strptime(date_string, format)
32+
33+
def __new__(cls, *args, **kwargs):
34+
return datetime(*args, **kwargs)
35+
36+
37+
@pytest.fixture
38+
def mock_datetime(monkeypatch):
39+
"""Monkeypatches script.datetime so utcnow() returns a fixed timestamp."""
40+
monkeypatch.setattr(script, "datetime", MockDatetime)
41+
42+
43+
@pytest.mark.parametrize(
44+
"tversion, icurl_outputs, expected_result, expected_data",
45+
[
46+
# Case 1: Target version 6.2(2a) is beyond both affected ranges (6.1(5e) and 6.2(1g)).
47+
# The target binary has the fix so version gate fails. Expected: NA without any API calls.
48+
(
49+
"6.2(2a)",
50+
{},
51+
script.NA,
52+
[],
53+
),
54+
# Case 2: Target version 6.1(5e) is affected, no dbgacEpgSummaryTask objects found.
55+
# No stale tasks present — system is safe. Expected: PASS.
56+
(
57+
"6.1(5e)",
58+
{
59+
task_api: read_data(dir, "dbgacEpgSummaryTask_empty.json"),
60+
},
61+
script.PASS,
62+
[],
63+
),
64+
# Case 3: Target version 6.1(5e) is affected, one task in processing state but startTs is
65+
# only 30 minutes old (within 24-hour threshold). Not considered stale.
66+
# Expected: PASS.
67+
(
68+
"6.1(5e)",
69+
{
70+
task_api: read_data(dir, "dbgacEpgSummaryTask_recent.json"),
71+
},
72+
script.PASS,
73+
[],
74+
),
75+
# Case 4: Target version 6.1(5e) is affected, one task stuck in processing with startTs
76+
# from 2024 (way older than 24 hours). Stale task detected.
77+
# Expected: FAIL_O with the offending DN and startTs reported.
78+
(
79+
"6.1(5e)",
80+
{
81+
task_api: read_data(dir, "dbgacEpgSummaryTask_stale.json"),
82+
},
83+
script.FAIL_O,
84+
[
85+
[
86+
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
87+
"2024-01-01T00:00:00.000+00:00",
88+
]
89+
],
90+
),
91+
# Case 5: Target version 6.2(1g) is affected, two tasks — one stale (2024), one recent.
92+
# Only the stale task should be reported. Expected: FAIL_O with one row.
93+
(
94+
"6.2(1g)",
95+
{
96+
task_api: read_data(dir, "dbgacEpgSummaryTask_mixed.json"),
97+
},
98+
script.FAIL_O,
99+
[
100+
[
101+
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
102+
"2024-01-01T00:00:00.000+00:00",
103+
]
104+
],
105+
),
106+
],
107+
)
108+
def test_logic(run_check, mock_icurl, mock_datetime, tversion, icurl_outputs, expected_result, expected_data):
109+
result = run_check(
110+
tversion=script.AciVersion(tversion),
111+
)
112+
assert result.result == expected_result
113+
assert result.data == expected_data

0 commit comments

Comments
 (0)