Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 69 additions & 7 deletions config/chassis_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import re
import subprocess
import utilities_common.cli as clicommon
from utilities_common.chassis import is_smartswitch, get_all_dpus
from utilities_common.chassis import is_smartswitch, is_bmc, get_all_dpus
from utilities_common.module import ModuleHelper
from datetime import datetime, timedelta, timezone
from datetime import timedelta

TIMEOUT_SECS = 10
TRANSITION_TIMEOUT = timedelta(seconds=240) # 4 minutes
Expand Down Expand Up @@ -60,6 +60,9 @@ def get_config_module_state(db, chassis_module_name):
if not fvs:
if is_smartswitch():
return 'down'
elif is_bmc() and chassis_module_name.startswith("SWITCH-HOST"):
# On BMC, SWITCH-HOST default is 'down' to keep it powered off on boot
return 'down'
else:
return 'up'
else:
Expand Down Expand Up @@ -141,12 +144,16 @@ def fabric_module_set_admin_status(db, chassis_module_name, state):
type=click.Choice(get_all_dpus(), case_sensitive=False) if is_smartswitch() else str
)
def shutdown_chassis_module(db, chassis_module_name):
"""Chassis-module shutdown of module"""
"""Shutdown chassis module (sets admin_status to down; default for SWITCH-HOST on BMC)"""
config_db = db.cfgdb
ctx = click.get_current_context()

if not chassis_module_name.startswith(("SUPERVISOR", "LINE-CARD", "FABRIC-CARD", "DPU")):
ctx.fail("'module_name' has to begin with 'SUPERVISOR', 'LINE-CARD', 'FABRIC-CARD', or 'DPU'")
allowed_prefixes = ("SUPERVISOR", "LINE-CARD", "FABRIC-CARD", "DPU")
if is_bmc():
allowed_prefixes += ("SWITCH-HOST",)
if not chassis_module_name.startswith(allowed_prefixes):
allowed_prefixes_str = "', '".join(allowed_prefixes)
ctx.fail(f"'module_name' has to begin with '{allowed_prefixes_str}'")

if get_config_module_state(db, chassis_module_name) == 'down':
click.echo(f"Module {chassis_module_name} is already in down state")
Expand All @@ -163,6 +170,10 @@ def shutdown_chassis_module(db, chassis_module_name):
'admin_status': 'down',
}
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
elif is_bmc() and chassis_module_name.startswith("SWITCH-HOST"):
click.echo(f"Shutting down chassis module {chassis_module_name}")
# Use mod_entry to preserve power_on_delay and graceful_shutdown_timeout in the same entry
config_db.mod_entry('CHASSIS_MODULE', chassis_module_name, {'admin_status': 'down'})
else:
click.echo(f"Shutting down chassis module {chassis_module_name}")
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, {'admin_status': 'down'})
Expand All @@ -186,8 +197,12 @@ def startup_chassis_module(db, chassis_module_name):
config_db = db.cfgdb
ctx = click.get_current_context()

if not chassis_module_name.startswith(("SUPERVISOR", "LINE-CARD", "FABRIC-CARD", "DPU")):
ctx.fail("'module_name' has to begin with 'SUPERVISOR', 'LINE-CARD', 'FABRIC-CARD', or 'DPU'")
allowed_prefixes = ("SUPERVISOR", "LINE-CARD", "FABRIC-CARD", "DPU")
if is_bmc():
allowed_prefixes += ("SWITCH-HOST",)
if not chassis_module_name.startswith(allowed_prefixes):
allowed_prefixes_str = "', '".join(allowed_prefixes)
ctx.fail(f"'module_name' has to begin with '{allowed_prefixes_str}'")
return

if get_config_module_state(db, chassis_module_name) == 'up':
Expand All @@ -205,10 +220,57 @@ def startup_chassis_module(db, chassis_module_name):
'admin_status': 'up',
}
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
elif is_bmc() and chassis_module_name.startswith("SWITCH-HOST"):
click.echo(f"Starting up chassis module {chassis_module_name}")
# Use mod_entry to preserve power_on_delay and graceful_shutdown_timeout in the same entry
config_db.mod_entry('CHASSIS_MODULE', chassis_module_name, {'admin_status': 'up'})
else:
click.echo(f"Starting up chassis module {chassis_module_name}")
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, None)

if chassis_module_name.startswith("FABRIC-CARD"):
if not check_config_module_state_with_timeout(ctx, db, chassis_module_name, 'up'):
fabric_module_set_admin_status(db, chassis_module_name, 'up')


if is_bmc():

#
# 'power-on-delay' subcommand ('config chassis modules power-on-delay ...')
#
@modules.command('power-on-delay')
@clicommon.pass_db
@click.argument('chassis_module_name', metavar='<module_name>', required=True)
@click.argument('seconds', metavar='<seconds>', required=True, type=click.IntRange(min=0))
def set_power_on_delay(db, chassis_module_name, seconds):
"""Configure delay (secs) BMC waits before powering on Switch-Host (default: 0)"""
ctx = click.get_current_context()

if not chassis_module_name.startswith("SWITCH-HOST"):
ctx.fail("'power-on-delay' is only applicable to SWITCH-HOST modules")

config_db = db.cfgdb
fvs = config_db.get_entry('CHASSIS_MODULE', chassis_module_name) or {}
fvs['power_on_delay'] = str(seconds)
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
click.echo(f"Power-on-delay for {chassis_module_name} set to {seconds} seconds")

#
# 'shutdown-timeout' subcommand ('config chassis modules shutdown-timeout ...')
#
@modules.command('shutdown-timeout')
@clicommon.pass_db
@click.argument('chassis_module_name', metavar='<module_name>', required=True)
@click.argument('seconds', metavar='<seconds>', required=True, type=click.IntRange(min=0))
def set_graceful_shutdown_timeout(db, chassis_module_name, seconds):
"""Configure graceful-shutdown timeout (secs) before BMC forces power-off (0: immediate, default: 120)"""
ctx = click.get_current_context()

if not chassis_module_name.startswith("SWITCH-HOST"):
ctx.fail("'shutdown-timeout' is only applicable to SWITCH-HOST modules")

config_db = db.cfgdb
fvs = config_db.get_entry('CHASSIS_MODULE', chassis_module_name) or {}
fvs['graceful_shutdown_timeout'] = str(seconds)
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
click.echo(f"Shutdown-timeout for {chassis_module_name} set to {seconds} seconds")
50 changes: 50 additions & 0 deletions config/liquid_cool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import click
import utilities_common.cli as clicommon

LEAK_CONTROL_POLICY_TABLE = 'LEAK_CONTROL_POLICY'
LEAK_CONTROL_POLICY_KEY = 'policy'

VALID_POLICIES = ('system', 'rack_mgr')
VALID_SEVERITIES = ('critical', 'minor')
VALID_ACTIONS = ('syslog_only', 'graceful_shutdown', 'power_off')

POLICY_FIELD_MAP = {
'system': 'system_leak_policy',
'rack_mgr': 'rack_mgr_leak_policy',
}

ACTION_FIELD_MAP = {
('system', 'critical'): 'system_critical_leak_action',
('system', 'minor'): 'system_minor_leak_action',
('rack_mgr', 'critical'): 'rack_mgr_critical_alert_action',
('rack_mgr', 'minor'): 'rack_mgr_minor_alert_action',
}


@click.group('liquid-cool')
def liquid_cool():
"""Liquid cooling configuration commands"""
pass


@liquid_cool.command('leak-control')
@clicommon.pass_db
@click.argument('policy_type', metavar='[system|rack_mgr]', type=click.Choice(VALID_POLICIES))
@click.argument('state', metavar='[enabled|disabled]', type=click.Choice(['enabled', 'disabled']))
def leak_control(db, policy_type, state):
"""Enable or disable system/rack-manager leak policy enforcement"""
field = POLICY_FIELD_MAP[policy_type]
db.cfgdb.mod_entry(LEAK_CONTROL_POLICY_TABLE, LEAK_CONTROL_POLICY_KEY, {field: state})
click.echo(f"Leak control policy for '{policy_type}' set to '{state}'")


@liquid_cool.command('leak-action')
@clicommon.pass_db
@click.argument('policy_type', metavar='[system|rack_mgr]', type=click.Choice(VALID_POLICIES))
@click.argument('severity', metavar='[critical|minor]', type=click.Choice(VALID_SEVERITIES))
@click.argument('action', metavar='[syslog_only|graceful_shutdown|power_off]', type=click.Choice(VALID_ACTIONS))
def leak_action(db, policy_type, severity, action):
"""Configure the action taken when a critical/minor leak event is detected"""
field = ACTION_FIELD_MAP[(policy_type, severity)]
db.cfgdb.mod_entry(LEAK_CONTROL_POLICY_TABLE, LEAK_CONTROL_POLICY_KEY, {field: action})
click.echo(f"Leak action for '{policy_type}' '{severity}' events set to '{action}'")
2 changes: 2 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from . import aaa
from . import bmc
from . import chassis_modules
from . import liquid_cool
from . import console
from . import feature
from . import fabric
Expand Down Expand Up @@ -1756,6 +1757,7 @@ def config(ctx):
config.add_command(aaa.radius)
config.add_command(bmc.bmc)
config.add_command(chassis_modules.chassis)
config.add_command(liquid_cool.liquid_cool)
config.add_command(console.console)
config.add_command(fabric.fabric)
config.add_command(feature.feature)
Expand Down
22 changes: 21 additions & 1 deletion show/chassis_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from natsort import natsorted
from tabulate import tabulate
from swsscommon.swsscommon import SonicV2Connector
from utilities_common.chassis import is_smartswitch
from utilities_common.chassis import is_smartswitch, is_bmc
from utilities_common.module import ModuleHelper, NOT_AVAILABLE
from sonic_platform_base.module_base import ModuleBase

import utilities_common.cli as clicommon
Expand Down Expand Up @@ -51,6 +52,19 @@ def status(db, chassis_module_name):
print('Key {} not found in {} table'.format(key_pattern, CHASSIS_MODULE_INFO_TABLE))
return

# On BMC, oper_status is read directly from the platform API.
# ModuleHelper.__init__ does not raise on chassis load failure; it logs and keeps
# platform_chassis=None. Treat that as unavailable so we don't emit per-module
# errors in the loop — just fall back to STATE_DB silently.
module_helper = None
Comment thread
judyjoseph marked this conversation as resolved.
if is_bmc():
try:
helper = ModuleHelper()
if helper.platform_chassis:
module_helper = helper
except Exception:
pass

table = []
for key in natsorted(keys):
key_list = key.split('|')
Expand All @@ -66,6 +80,12 @@ def status(db, chassis_module_name):
oper_status = data_dict.get(CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, ModuleBase.MODULE_STATUS_EMPTY)
serial = data_dict.get(CHASSIS_MODULE_INFO_SERIAL_FIELD, 'N/A')

# On BMC, prefer oper_status from platform API; fall back to STATE_DB if unavailable
if module_helper is not None:
platform_oper_status = module_helper.get_module_oper_status(key_list[1])
if platform_oper_status != NOT_AVAILABLE:
oper_status = platform_oper_status

# Determine admin_status
if is_smartswitch():
admin_status = 'down'
Expand Down
118 changes: 110 additions & 8 deletions show/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys

import click
from tabulate import tabulate
import utilities_common.cli as clicommon
from sonic_py_common import device_info

Expand Down Expand Up @@ -39,6 +40,7 @@ def try_get(platform, attr, fallback):

return chassis_info


#
# 'platform' group ("show platform ...")
#
Expand Down Expand Up @@ -306,15 +308,115 @@ def firmware(args):
sys.exit(e.returncode)


# 'leakage' subcommand ("show platform leakage status")
LEAK_CONTROL_POLICY_TABLE = 'LEAK_CONTROL_POLICY'
LEAK_CONTROL_POLICY_KEY = 'policy'
RACK_MANAGER_ALERT_TABLE = 'RACK_MANAGER_ALERT'
LEAK_PROFILE_TABLE = 'LEAK_PROFILE'
LIQUID_COOLING_INFO_TABLE = 'LIQUID_COOLING_INFO'


def _get_state_db():
from swsscommon.swsscommon import SonicV2Connector
state_db = SonicV2Connector(host="127.0.0.1")
state_db.connect(state_db.STATE_DB)
return state_db


# 'leak' group ("show platform leak ...")
@platform.group()
def leakage():
"""Show platform leakage information"""
def leak():
"""Show liquid cooling leak information"""
pass


@leakage.command()
def status():
"""Show platform leakage status"""
cmd = ["leakageshow"]
clicommon.run_command(cmd)
@leak.command('control-policy')
def leak_control_policy():
Comment thread
judyjoseph marked this conversation as resolved.
"""Show leak control policy configuration"""
try:
from utilities_common.db import Db
db = Db()
entry = db.cfgdb.get_entry(LEAK_CONTROL_POLICY_TABLE, LEAK_CONTROL_POLICY_KEY)
click.echo(" system_leak_policy : {}".format(entry.get('system_leak_policy', 'enabled')))
critical_action = entry.get('system_critical_leak_action', 'power_off')
click.echo(" system_critical_leak_action : {}".format(critical_action))
click.echo(" system_minor_leak_action : {}".format(entry.get('system_minor_leak_action', 'syslog_only')))
click.echo(" rack_mgr_leak_policy : {}".format(entry.get('rack_mgr_leak_policy', 'enabled')))
rack_critical_action = entry.get('rack_mgr_critical_alert_action', 'syslog_only')
click.echo(" rack_mgr_critical_alert_action : {}".format(rack_critical_action))
rack_minor_action = entry.get('rack_mgr_minor_alert_action', 'syslog_only')
click.echo(" rack_mgr_minor_alert_action : {}".format(rack_minor_action))
except Exception as e:
click.echo(f"Error: Failed to retrieve leak control policy: {e}", err=True)


@leak.group('rack-manager')
def leak_rack_manager():
"""Show rack-manager leak information"""
pass


@leak_rack_manager.command('alerts')
def leak_rack_manager_alerts():
"""Show rack-manager alerts"""
try:
state_db = _get_state_db()
keys = state_db.keys(state_db.STATE_DB, f"{RACK_MANAGER_ALERT_TABLE}|*") or []
header = ['Alert', 'Severity', 'Timestamp']
rows = []
for key in sorted(keys):
alert_name = key.split('|', 1)[1]
data = state_db.get_all(state_db.STATE_DB, key) or {}
severity = data.get('severity', data.get('leak', 'N/A'))
timestamp = data.get('timestamp', 'N/A')
rows.append((alert_name, severity, timestamp))
if rows:
click.echo(tabulate(rows, header, tablefmt='simple'))
else:
click.echo("No rack-manager alerts found")
except Exception as e:
click.echo(f"Error: Failed to retrieve rack-manager leak alerts: {e}", err=True)


@leak.command('profiles')
def leak_profiles():
"""Show leak sensor profiles"""
try:
from utilities_common.db import Db
db = Db()
keys = db.cfgdb.get_keys(LEAK_PROFILE_TABLE) or []
header = ['Sensor-Type', 'Max-Minor-Duration-Sec']
rows = []
for sensor_type in sorted(keys):
entry = db.cfgdb.get_entry(LEAK_PROFILE_TABLE, sensor_type)
max_dur = entry.get('max_minor_duration_sec', 'N/A')
rows.append((sensor_type, max_dur))
if rows:
click.echo(tabulate(rows, header, tablefmt='simple'))
else:
click.echo("No leak profiles found")
except Exception as e:
click.echo(f"Error: Failed to retrieve leak sensor profiles: {e}", err=True)


@leak.command('status')
def leak_status():
"""Show leak sensor status"""
try:
state_db = _get_state_db()
keys = state_db.keys(state_db.STATE_DB, f"{LIQUID_COOLING_INFO_TABLE}|*") or []
header = ['Name', 'Leak', 'Leak-sensor-status', 'leak-sensor-type', 'leak-severity']
rows = []
for key in sorted(keys):
data = state_db.get_all(state_db.STATE_DB, key) or {}
name = data.get('name', key.split('|', 1)[1])
leaking = data.get('leaking', 'N/A')
sensor_status = data.get('leak_sensor_status', 'N/A')
sensor_type = data.get('type', 'N/A')
severity = data.get('leak_severity', 'N/A') if leaking.upper() in ('YES', 'TRUE') else 'NA'
rows.append((name, leaking, sensor_status, sensor_type, severity))
if rows:
click.echo(tabulate(rows, header, tablefmt='simple'))
else:
click.echo("No leak sensor data found")
except Exception as e:
click.echo(f"Error: Failed to retrieve leak sensor status: {e}", err=True)
Loading
Loading