Skip to content

Commit 5a620c0

Browse files
jeestr4dwelkinhemonrog2joeleblatakishida
authored
CSCwp95515 - Policydist crash-loop on upgrade if configpushShardCont headTx is non-zero while tailTx is zero #253 (#260)
* fix for issue NewValidation: CSCwp95515 - Policydist crash-loop on upgrade if configpushShardCont headTx is non-zero while tailTx is zero #253 * Welkin issue 245 (#263) * Introduced CSCwp22212 check for stale pconsRA objects --------- Co-authored-by: Gabriel <gmonroy@cisco.com> * Add ISIS DTEP Check (#247) * Add ISIS DTEP Check --------- Co-authored-by: Gabriel <gmonroy@cisco.com> * bump to v2.6.0 * PUV updates (#254) * feat: synth class + write back each function as json result * fix: Clean up get_vpc_nodes() and its pytest * feat: Split the main script execution code into each func and add pytest * feat: Retire json_log and adopt json per rule for both PUV and regular use + linting * feat: Support QA version and AciVersion instance as input in AciVersion (#259) * feat: Support QA version and AciVersion instance as input in AciVersion * fix: Use supported version format in older_than * fix: Use ValueError in AciVersion * feat: -c (cversion) / -d (debug_function) args into puv (#265) * feat: Update synthMaintP with the latest schema with ruleId * feat: Add decorator `@check_wrapper` for all check functions A new decorator `@check_wrapper` is to move most of the I/O functionalities, such as printing and writing the result into a file, outside of each check function so that each check can focus on the validation logic itself by minimizing the impact from a requirement change in the output format and so on. To support this, a new class `Result` is also introduced to make it clear what a check function is expected to return. As long as `Result` class is returned, the decorator `@check_wrapper` handles the printing them to stdout and files. * feat: rename synth class to AciResult + breakdown --puv into --api-only and --no-cleanup * feat: Add --version and --total-checks input args * feat: Do not touch log folders with some options like --version * fix: ValueError when col and row len do not match + pytest * fix: correct checks with mismatched col row length * fix: cimc logic update for QA --------- Co-authored-by: takishida <38262981+takishida@users.noreply.github.com> Co-authored-by: tkishida <tkishida@cisco.com> * bump to v3.0.0 v3.0.0 to mark for PUV integration + accounting for the 7 new input arg options * fix for issue NewValidation: CSCwp95515 - Policydist crash-loop on upgrade if configpushShardCont headTx is non-zero while tailTx is zero #253 * testchange * changes based on version3 * logic + pytest cleanup * changed the query and content of pos file to match with bug coditions. * fix query + cleanup docs * title consistency * fix doc link --------- Co-authored-by: Welkin <48639332+welkin-he@users.noreply.github.com> Co-authored-by: Gabriel <gmonroy@cisco.com> Co-authored-by: Joe LeBlanc <joelebla@cisco.com> Co-authored-by: GM <monrog2@gmail.com> Co-authored-by: takishida <38262981+takishida@users.noreply.github.com> Co-authored-by: tkishida <tkishida@cisco.com>
1 parent 2c3f18c commit 5a620c0

4 files changed

Lines changed: 128 additions & 1 deletion

File tree

aci-preupgrade-validation-script.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5302,6 +5302,35 @@ def apic_database_size_check(cversion, **kwargs):
53025302
result = FAIL_UF
53035303
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
53045304

5305+
5306+
@check_wrapper(check_title='Policydist configpushShardCont crash')
5307+
def configpush_shard_check(tversion, **kwargs):
5308+
result = NA
5309+
headers = ["dn", "headTx", "tailTx"]
5310+
data = []
5311+
recommended_action = 'Contact Cisco TAC for Support before upgrade'
5312+
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#policydist-configpushshardcont-crash'
5313+
5314+
if not tversion:
5315+
return Result(result=MANUAL, msg=TVER_MISSING)
5316+
5317+
if tversion.older_than("6.1(4a)"):
5318+
result = PASS
5319+
configpushShardCont_api = 'configpushShardCont.json'
5320+
configpushShardCont_api += '?query-target-filter=and(eq(configpushShardCont.tailTx,"0"),ne(configpushShardCont.headTx,"0"))'
5321+
configpush_sh_cont = icurl('class', configpushShardCont_api)
5322+
if configpush_sh_cont:
5323+
for sh_cont in configpush_sh_cont:
5324+
headtx = sh_cont['configpushShardCont']['attributes']['headTx']
5325+
tailtx = sh_cont['configpushShardCont']['attributes']['tailTx']
5326+
sh_cont_dn = sh_cont['configpushShardCont']['attributes']['dn']
5327+
data.append([sh_cont_dn, headtx, tailtx])
5328+
5329+
if data:
5330+
result = FAIL_O
5331+
5332+
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
5333+
53055334
# ---- Script Execution ----
53065335

53075336

@@ -5461,6 +5490,7 @@ def get_checks(api_only, debug_function):
54615490
pbr_high_scale_check,
54625491
standby_sup_sync_check,
54635492
isis_database_byte_check,
5493+
configpush_shard_check,
54645494

54655495
]
54665496
conn_checks = [

docs/docs/validations.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ Items | Defect | This Script
188188
[Observer Database Size][d25] | CSCvw45531 | :white_check_mark: | :no_entry_sign:
189189
[Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign:
190190
[ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign:
191-
191+
[Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: |
192192

193193
[d1]: #ep-announce-compatibility
194194
[d2]: #eventmgr-db-size-defect-susceptibility
@@ -217,6 +217,7 @@ Items | Defect | This Script
217217
[d25]: #observer-database-size
218218
[d26]: #stale-pconsra-object
219219
[d27]: #isis-dteps-byte-size
220+
[d28]: #policydist-configpushshardcont-crash
220221

221222

222223
## General Check Details
@@ -2579,6 +2580,15 @@ Do not upgrade to any affected ACI software release if this check fails.
25792580
Nexus Dashboard Insights (NDI) integration can cause ACI tech support generation to happen automatically as part of the bug scan feature.
25802581

25812582

2583+
### Policydist configpushShardCont crash
2584+
2585+
In ACI, there are internal objects which track the underlying transactions which occur as policies are handled by the Policydist process. One such object is `configpushShardCont` which populates the `headTx` and `tailTx` parameters to mark any potentially stuck transactions.
2586+
2587+
Due to [CSCwp95515][59], upgrading to an affected version while having any `configpushShardCont` objects with a non-zero `headTx` and `tailTx: 0` can result in the Policydist process crashing if config is pushed to a PM shard matching the `dn` of the identified `configpushShardCont`.
2588+
2589+
If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade.
2590+
2591+
25822592
[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
25832593
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
25842594
[2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
@@ -2638,3 +2648,4 @@ Do not upgrade to any affected ACI software release if this check fails.
26382648
[56]: https://www.cisco.com/c/en/us/td/docs/dcn/whitepapers/cisco-aci-virtual-edge-migration.html
26392649
[57]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp22212
26402650
[58]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp15375
2651+
[59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[
2+
{
3+
"configpushShardCont": {
4+
"attributes": {
5+
"childAction": "",
6+
"dn": "configpush/sh-2",
7+
"headTx": "576460752318322171",
8+
"lcOwn": "local",
9+
"modTs": "2025-06-28T21:32:58.754+00:00",
10+
"shard": "2",
11+
"status": "",
12+
"tailTx": "0"
13+
}
14+
}
15+
},
16+
{
17+
"configpushShardCont": {
18+
"attributes": {
19+
"childAction": "",
20+
"dn": "configpush/sh-3",
21+
"headTx": "516460756318314171",
22+
"lcOwn": "local",
23+
"modTs": "2025-06-23T18:12:06.867+00:00",
24+
"shard": "3",
25+
"status": "",
26+
"tailTx": "0"
27+
}
28+
}
29+
}
30+
]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import os
2+
import pytest
3+
import logging
4+
import importlib
5+
from helpers.utils import read_data
6+
7+
script = importlib.import_module("aci-preupgrade-validation-script")
8+
9+
log = logging.getLogger(__name__)
10+
dir = os.path.dirname(os.path.abspath(__file__))
11+
12+
# icurl queries
13+
configpushShardCont_api = 'configpushShardCont.json'
14+
configpushShardCont_api += '?query-target-filter=and(eq(configpushShardCont.tailTx,"0"),ne(configpushShardCont.headTx,"0"))'
15+
16+
@pytest.mark.parametrize(
17+
"icurl_outputs, tversion, expected_result",
18+
[
19+
# tversion not given
20+
(
21+
{configpushShardCont_api: []},
22+
None,
23+
script.MANUAL,
24+
),
25+
# Non-fixed Versions
26+
(
27+
# affected tversion, configpushShardCont_api has non-zero headTx / tailTx
28+
{configpushShardCont_api: read_data(dir, "configpushShardCont_pos.json")},
29+
"6.0(3a)",
30+
script.FAIL_O,
31+
),
32+
(
33+
# affected tversion, all configpushShardCont_api tx are 0
34+
{configpushShardCont_api: []},
35+
"5.2(6a)",
36+
script.PASS,
37+
),
38+
# Fixed Versions
39+
(
40+
# non-affected tversion, configpushShardCont_api has non-zero headTx / tailTx
41+
{configpushShardCont_api: read_data(dir, "configpushShardCont_pos.json")},
42+
"6.1(6b)",
43+
script.NA,
44+
),
45+
(
46+
# non-affected tversion, all configpushShardCont_api tx are 0
47+
{configpushShardCont_api: []},
48+
"6.1(4b)",
49+
script.NA,
50+
),
51+
],
52+
)
53+
def test_logic(mock_icurl, tversion, expected_result):
54+
tversion = script.AciVersion(tversion) if tversion else None
55+
result = script.configpush_shard_check(1, 1, tversion)
56+
assert result == expected_result

0 commit comments

Comments
 (0)