Skip to content

Commit 2789582

Browse files
committed
Updated as branch v4.2.0-dev branch
2 parents f903294 + 0d28b80 commit 2789582

24 files changed

Lines changed: 830 additions & 146 deletions

aci-preupgrade-validation-script.py

Lines changed: 118 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import os
3939
import re
4040

41-
SCRIPT_VERSION = "v4.1.0-dev"
41+
SCRIPT_VERSION = "v4.2.0-dev"
4242
DEFAULT_TIMEOUT = 600 # sec
4343
# result constants
4444
DONE = 'DONE'
@@ -3010,17 +3010,19 @@ def scalability_faults_check(**kwargs):
30103010

30113011

30123012
@check_wrapper(check_title="APIC Disk Space Usage (F1527, F1528, F1529 equipment-full)")
3013-
def apic_disk_space_faults_check(cversion, **kwargs):
3013+
def apic_disk_space_faults_check(cversion, tversion, **kwargs):
30143014
result = FAIL_UF
30153015
headers = ['Fault', 'Pod', 'Node', 'Mount Point', 'Current Usage %', 'Recommended Action']
30163016
data = []
30173017
unformatted_headers = ['Fault', 'Fault DN', 'Recommended Action']
30183018
unformatted_data = []
30193019
doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#apic-disk-space-usage"
3020+
# we are checking /tmp utilization because high usage can lead to snaphshot corruption during an upgrade. After the fix version, snapshot storage location moved to /data.
30203021
recommended_action = {
30213022
'/firmware': 'Remove unneeded images',
30223023
'/techsupport': 'Remove unneeded techsupports/cores',
3023-
'/data/log': 'Remove unneeded logs in var/log/dme/log'
3024+
'/data/log': 'Remove unneeded logs in var/log/dme/log',
3025+
'/tmp': 'Remove unneeded logs in /tmp directory'
30243026
}
30253027
default_action = 'Contact Cisco TAC.'
30263028
if cversion.same_as('4.0(1h)') or cversion.older_than('3.2(6i)'):
@@ -3029,6 +3031,8 @@ def apic_disk_space_faults_check(cversion, **kwargs):
30293031
dn_regex = node_regex + r'/.+p-\[(?P<mountpoint>.+)\]-f'
30303032
desc_regex = r'is (?P<usage>\d{2,3}%) full'
30313033

3034+
tmp_faults_skip_versions = ["6.0(9f)", "6.1(4h)", "6.2(1g)"]
3035+
tmp_faults_skipped = False # Track if we skip /tmp faults for CSCwo96334 versions
30323036
faultInsts = icurl('class',
30333037
'faultInst.json?query-target-filter=or(eq(faultInst.code,"F1527"),eq(faultInst.code,"F1528"),eq(faultInst.code,"F1529"))')
30343038
for faultInst in faultInsts:
@@ -3038,14 +3042,24 @@ def apic_disk_space_faults_check(cversion, **kwargs):
30383042
fc = faultInst['faultInst']['attributes']['code']
30393043
dn = re.search(dn_regex, faultInst['faultInst']['attributes']['dn'])
30403044
desc = re.search(desc_regex, faultInst['faultInst']['attributes']['descr'])
3041-
if dn and desc:
3042-
data.append([fc, dn.group('pod'), dn.group('node'), dn.group('mountpoint'),
3045+
if dn:
3046+
mountpoint = dn.group('mountpoint')
3047+
# CSCwo96334: Skip /tmp faults when target is >= 6.1(4h) or any unaffected versions
3048+
if mountpoint == '/tmp' and (not tversion.older_than("6.1(4h)") or any(tversion.same_as(version) for version in tmp_faults_skip_versions)):
3049+
tmp_faults_skipped = True
3050+
continue
3051+
if desc:
3052+
data.append([fc, dn.group('pod'), dn.group('node'), dn.group('mountpoint'),
30433053
desc.group('usage'),
30443054
recommended_action.get(dn.group('mountpoint'), default_action)])
3045-
else:
3046-
unformatted_data.append([fc, faultInst['faultInst']['attributes']['dn'], default_action])
3055+
else:
3056+
unformatted_data.append([fc, faultInst['faultInst']['attributes']['dn'], default_action])
30473057
if not data and not unformatted_data:
3048-
result = PASS
3058+
# If we only found /tmp faults that were skipped (CSCwo96334 fixed target versions), return NA
3059+
if tmp_faults_skipped:
3060+
result = NA
3061+
else:
3062+
result = PASS
30493063
return Result(
30503064
result=result,
30513065
headers=headers,
@@ -3866,27 +3880,83 @@ def target_version_compatibility_check(cversion, tversion, **kwargs):
38663880
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
38673881

38683882

3869-
@check_wrapper(check_title="Gen 1 switch compatibility")
3870-
def gen1_switch_compatibility_check(tversion, fabric_nodes, **kwargs):
3883+
@check_wrapper(check_title="Supported hardware compatibility")
3884+
def supported_hardware_check(tversion, fabric_nodes, **kwargs):
38713885
result = FAIL_UF
3872-
headers = ["Target Version", "Node ID", "Model", "Warning"]
3886+
headers = ["Target Version", "Node ID", "Model", "Type", "Warning"]
3887+
data = []
3888+
unformatted_headers = ["Target Version", "DN", "Model", "Type", "Warning"]
3889+
unformatted_data = []
38733890
gen1_models = ["N9K-C9336PQ", "N9K-X9736PQ", "N9K-C9504-FM", "N9K-C9508-FM", "N9K-C9516-FM", "N9K-C9372PX-E",
38743891
"N9K-C9372TX-E", "N9K-C9332PQ", "N9K-C9372PX", "N9K-C9372TX", "N9K-C9396PX", "N9K-C9396TX",
38753892
"N9K-C93128TX"]
3876-
data = []
3893+
unsupported_6_0_1_switch_models = ["N9K-C93120TX"]
3894+
unsupported_6_1_1_switch_models = ["N9K-C93180LC-EX"]
3895+
unsupported_5_0_1_exp_module_models = ["N9K-M12PQ", "N9K-M6PQ", "N9K-M6PQ-E"]
3896+
unsupported_6_1_1_fex_models = ["N2K-C2332TQ-10GT", "N2K-C2348TQ-10GE", "N2K-C2232PP-10GE", "N2K-C2232TM-E-10GE", "N2K-C2348TQ-10G-E"]
3897+
unsupported_6_1_1_sup_models = ["N9K-SUP-A", "N9K-SUP-B"]
38773898
recommended_action = 'Select supported target version or upgrade hardware'
3878-
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#compatibility-switch-hardware-gen1'
3899+
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#supported-hardware-compatibility'
38793900

3880-
if not tversion:
3881-
return Result(result=MANUAL, msg=TVER_MISSING)
3882-
if tversion.newer_than("5.0(1a)"):
3901+
if not tversion.older_than("5.0(1a)"):
38833902
for node in fabric_nodes:
3884-
if node['fabricNode']['attributes']['model'] in gen1_models:
3885-
data.append([str(tversion), node['fabricNode']['attributes']['id'],
3886-
node['fabricNode']['attributes']['model'], 'Not supported on 5.x+'])
3887-
if not data:
3903+
model = node['fabricNode']['attributes']['model']
3904+
if model in gen1_models:
3905+
data.append([str(tversion), node['fabricNode']['attributes']['id'], model, 'Switch', 'Not supported on 5.x+'])
3906+
3907+
eqptLCs = icurl('class', 'eqptLC.json')
3908+
for eqptLC in eqptLCs:
3909+
model = eqptLC['eqptLC']['attributes']['model']
3910+
if model in unsupported_5_0_1_exp_module_models:
3911+
dn = re.search(node_regex, eqptLC['eqptLC']['attributes']['dn'])
3912+
if dn:
3913+
data.append([str(tversion), dn.group('node'), model, 'Expansion Module', 'Not supported on 5.x+'])
3914+
else:
3915+
unformatted_data.append([str(tversion), eqptLC['eqptLC']['attributes']['dn'], model, 'Expansion Module', 'Not supported on 5.x+'])
3916+
3917+
if not tversion.older_than("6.0(1a)"):
3918+
for node in fabric_nodes:
3919+
model = node['fabricNode']['attributes']['model']
3920+
if model in unsupported_6_0_1_switch_models:
3921+
data.append([str(tversion), node['fabricNode']['attributes']['id'], model, 'Switch', 'Deprecated from 6.0(1)+'])
3922+
3923+
if not tversion.older_than("6.1(1f)"):
3924+
for node in fabric_nodes:
3925+
model = node['fabricNode']['attributes']['model']
3926+
if model in unsupported_6_1_1_switch_models:
3927+
data.append([str(tversion), node['fabricNode']['attributes']['id'], model, 'Switch', 'Deprecated from 6.1(1)+'])
3928+
3929+
eqptExtChs = icurl('class', 'eqptExtCh.json')
3930+
for eqptExtCh in eqptExtChs:
3931+
model = eqptExtCh['eqptExtCh']['attributes']['model']
3932+
if model in unsupported_6_1_1_fex_models:
3933+
dn = re.search(node_regex, eqptExtCh['eqptExtCh']['attributes']['dn'])
3934+
if dn:
3935+
data.append([str(tversion), dn.group('node'), model, 'FEX', 'Deprecated from 6.1(1)+'])
3936+
else:
3937+
unformatted_data.append([str(tversion), eqptExtCh['eqptExtCh']['attributes']['dn'], model, 'FEX', 'Deprecated from 6.1(1)+'])
3938+
3939+
eqptSupCs = icurl('class', 'eqptSupC.json')
3940+
for eqptSupC in eqptSupCs:
3941+
model = eqptSupC['eqptSupC']['attributes']['model']
3942+
if model in unsupported_6_1_1_sup_models:
3943+
dn = re.search(node_regex, eqptSupC['eqptSupC']['attributes']['dn'])
3944+
if dn:
3945+
data.append([str(tversion), dn.group('node'), model, 'Supervisor', 'Deprecated from 6.1(1)+'])
3946+
else:
3947+
unformatted_data.append([str(tversion), eqptSupC['eqptSupC']['attributes']['dn'], model, 'Supervisor', 'Deprecated from 6.1(1)+'])
3948+
3949+
if not data and not unformatted_data:
38883950
result = PASS
3889-
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
3951+
return Result(
3952+
result=result,
3953+
headers=headers,
3954+
data=data,
3955+
unformatted_headers=unformatted_headers,
3956+
unformatted_data=unformatted_data,
3957+
recommended_action=recommended_action,
3958+
doc_url=doc_url,
3959+
)
38903960

38913961

38923962
@check_wrapper(check_title="Contract Port 22 Defect")
@@ -6292,7 +6362,7 @@ def multipod_modular_spine_bootscript_check(tversion, fabric_nodes, username, pa
62926362

62936363
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
62946364

6295-
6365+
62966366
@check_wrapper(check_title="Inband Management Policy Misconfiguration")
62976367
def inband_management_policy_misconfig_check(cversion, tversion, **kwargs):
62986368
result = PASS
@@ -6316,6 +6386,29 @@ def inband_management_policy_misconfig_check(cversion, tversion, **kwargs):
63166386
result = FAIL_O
63176387
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
63186388

6389+
6390+
@check_wrapper(check_title="svccore excessive data check")
6391+
def svccore_excessive_data_check(**kwargs):
6392+
result = PASS
6393+
headers = ['Class Name','Count']
6394+
data = []
6395+
recommended_action = "Delete the core files before proceeding with upgrade. Please refer to the document linked below and contact Cisco TAC for assistance if needed."
6396+
doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#svccore-excessive-data-check"
6397+
try:
6398+
svccoreCtrlr_classes_count = icurl('class', 'svccoreCtrlr.json?query-target=self&rsp-subtree-include=count')
6399+
svccoreNode_classes_count = icurl('class', 'svccoreNode.json?query-target=self&rsp-subtree-include=count')
6400+
6401+
if int(svccoreCtrlr_classes_count[0]['moCount']['attributes']['count']) > 240:
6402+
data.append(['svccoreCtrlr', svccoreCtrlr_classes_count[0]['moCount']['attributes']['count']])
6403+
if int(svccoreNode_classes_count[0]['moCount']['attributes']['count']) > 240:
6404+
data.append(['svccoreNode', svccoreNode_classes_count[0]['moCount']['attributes']['count']])
6405+
if data:
6406+
result = MANUAL
6407+
6408+
return Result(result=result,headers=headers,data=data,recommended_action=recommended_action,doc_url=doc_url)
6409+
except Exception as e:
6410+
return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url)
6411+
63196412

63206413
@check_wrapper(check_title='False Micron SSD failure_fault')
63216414
def false_micron_ssd_failure_fault_check(cversion, tversion, **kwargs):
@@ -6428,7 +6521,7 @@ class CheckManager:
64286521
api_checks = [
64296522
# General Checks
64306523
target_version_compatibility_check,
6431-
gen1_switch_compatibility_check,
6524+
supported_hardware_check,
64326525
r_leaf_compatibility_check,
64336526
cimc_compatibilty_check,
64346527
apic_cluster_health_check,
@@ -6442,6 +6535,7 @@ class CheckManager:
64426535
validate_32_64_bit_image_check,
64436536
fabric_link_redundancy_check,
64446537
apic_downgrade_compat_warning_check,
6538+
svccore_excessive_data_check,
64456539

64466540
# Faults
64476541
apic_disk_space_faults_check,
@@ -6693,4 +6787,4 @@ def main(_args=None):
66936787
msg = "Abort due to unexpected error - {}".format(e)
66946788
prints(msg)
66956789
log.error(msg, exc_info=True)
6696-
sys.exit(1)
6790+
sys.exit(1)

docs/docs/validations.md

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ Items | This Script
3737
[Fabric Link Redundancy][g17] | :white_check_mark: | :no_entry_sign:
3838
[APIC Database Size][g18] | :white_check_mark: | :no_entry_sign:
3939
[APIC downgrade compatibility when crossing 6.2 release][g19]| :white_check_mark: | :no_entry_sign:
40+
[Supported Hardware Compatibility][g20] | :white_check_mark: | :no_entry_sign:
41+
[Svccore Excessive Data Check][g21] | :white_check_mark: | :no_entry_sign:
4042

4143
[g1]: #compatibility-target-aci-version
4244
[g2]: #compatibility-cimc-version
@@ -57,6 +59,8 @@ Items | This Script
5759
[g17]: #fabric-link-redundancy
5860
[g18]: #apic-database-size
5961
[g19]: #apic-downgrade-compatibility-when-crossing-62-release
62+
[g20]: #supported-hardware-compatibility
63+
[g21]: #svccore-excessive-data-check
6064

6165
### Fault Checks
6266
Items | Faults | This Script | APIC built-in
@@ -267,6 +271,17 @@ The script checks the presence of generation one switches when the upgrade is cr
267271
Or you can check the [Release Note 15.0(1) of ACI switches][3] to see the list of generation one switches, typically the one without any suffix such as N9K-C9372PX, that are no longer supported from 15.0(1) release.
268272

269273

274+
### Supported Hardware Compatibility
275+
276+
The script checks the presence of deprecated hardware in the fabric.
277+
278+
The list of supported and unsupported hardware is populated from the Release Notes across all ACI releases. This means the check covers hardware compatibility changes introduced in any version, not just the most recent release. As new release notes are published and hardware is deprecated, this list is updated accordingly.
279+
280+
Refer the [Release Note 15.0(1) of ACI switches][3] to see the list of unsuporrted hardware for your desired target versions. Prior upgrading to target version, replace the unsupported hardware elements in your fabric with other supported hardware.
281+
282+
Contact cisco TAC for further assistance.
283+
284+
270285
### Compatibility (Remote Leaf Switch)
271286

272287
The script checks the requirement to use remote leaf switches on the target version.
@@ -2772,14 +2787,27 @@ Suggestion:
27722787
Contact Cisco TAC to remove any identified misconfigured objects before performing the upgrade to prevent policyelem crashes.
27732788
The [CSCwd40071][68] defect affects versions 5.2(5c) and later with a fix available in 6.0(1g). However, the issue will only be triggered during Apic upgrades crossing 6.0(4c) due to [CSCwh80837][67].
27742789

2790+
2791+
### Svccore Excessive Data Check
2792+
2793+
Due to excessive `svccoreCtrlr` or `svccoreNode` managed objects, Apic gui stuck in loading multiple queries.
2794+
2795+
The svccoreCtrlr and svccoreNode objects represent core files related to Apic and Leaf/Spines process respectively.
2796+
2797+
Due to [CSCws84232][67], the APIC GUI may become unresponsive after login, with dashboards stuck in a continuous “Loading…”state.
2798+
Administrators may be unable to access or operate the APIC GUI, potentially impacting day-to-day management or upgrade.
2799+
2800+
This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade.
2801+
27752802
### False Micron SSD failure_fault
27762803

2777-
Due to [CSCwt38698][69], certain Micron SSDs present in the fabric may experience premature end-of-life failures after upgrading to `6.1(5e)` or `6.2(1g)`.
2804+
Due to [CSCwt38698][70], certain Micron SSDs present in the fabric may experience premature end-of-life failures after upgrading to `6.1(5e)` or `6.2(1g)`.
27782805

27792806
To avoid this issue, change the target version to another version. Or run the **SSD Lifetime Validation** script on all nodes with identified Micron SSDs prior to upgrading to determine the remaining SSD lifetime. If the SSD lifetime is critically low, you have to follow the SSD replacement procedure outlined in the field notice to ensure that the node remains available after the upgrade. In case you already upgraded your node and are experiencing unavailability due to this issue, contact Cisco TAC for the SSD replacement procedure to restore the node.
27802807

27812808
- Script location: [SSD Lifetime Validation](https://github.com/datacenter/aci-tac-scripts/tree/main/SSD%20Lifetime%20Validation)
27822809

2810+
27832811
[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
27842812
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
27852813
[2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
@@ -2849,4 +2877,5 @@ To avoid this issue, change the target version to another version. Or run the **
28492877
[66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848
28502878
[67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837
28512879
[68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071
2852-
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt38698
2880+
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232
2881+
[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt38698

0 commit comments

Comments
 (0)