Skip to content

Commit

Permalink
Update for the procedures for insertion/hot swap of Switch Fabric Mod…
Browse files Browse the repository at this point in the history
…ule(SFM) by using "config chassis modules shutdown/startup" commands (sonic-net#3283)

sudo config chassis modules shutdown/startup <module name>

The HLD for Shutdown and Startup of the Fabric Module is below:
sonic-net/SONiC#1694
  • Loading branch information
JunhongMao authored and arfeigin committed Jun 16, 2024
1 parent 67e044c commit 6858d5f
Show file tree
Hide file tree
Showing 2 changed files with 213 additions and 1 deletion.
101 changes: 100 additions & 1 deletion config/chassis_modules.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
#!/usr/sbin/env python

import click

import time
import re
import subprocess
import utilities_common.cli as clicommon

TIMEOUT_SECS = 10


#
# 'chassis_modules' group ('config chassis_modules ...')
#
Expand All @@ -17,6 +22,81 @@ def modules():
"""Configure chassis modules"""
pass


def get_config_module_state(db, chassis_module_name):
config_db = db.cfgdb
fvs = config_db.get_entry('CHASSIS_MODULE', chassis_module_name)
if not fvs:
return 'up'
else:
return fvs['admin_status']


#
# Name: check_config_module_state_with_timeout
# return: True: timeout, False: not timeout
#
def check_config_module_state_with_timeout(ctx, db, chassis_module_name, state):
counter = 0
while get_config_module_state(db, chassis_module_name) != state:
time.sleep(1)
counter += 1
if counter >= TIMEOUT_SECS:
ctx.fail("get_config_module_state {} timeout".format(chassis_module_name))
return True
return False


def get_asic_list_from_db(chassisdb, chassis_module_name):
asic_list = []
asics_keys_list = chassisdb.keys("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE*")
for asic_key in asics_keys_list:
name = chassisdb.get("CHASSIS_STATE_DB", asic_key, "name")
if name == chassis_module_name:
asic_id = int(re.search(r"(\d+)$", asic_key).group())
asic_list.append(asic_id)
return asic_list


#
# Syntax: fabric_module_set_admin_status <chassis_module_name> <'up'/'down'>
#
def fabric_module_set_admin_status(db, chassis_module_name, state):
chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
asic_list = get_asic_list_from_db(chassisdb, chassis_module_name)

if len(asic_list) == 0:
return

if state == "down":
for asic in asic_list:
click.echo("Stop swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl stop swss@{}.service'.format(asic))

is_active = subprocess.call(["systemctl", "is-active", "--quiet", "swss@{}.service".format(asic)])

if is_active == 0: # zero active, non-zero, inactive
click.echo("Stop swss@{} and peer services failed".format(asic))
return

click.echo("Delete related CAHSSIS_FABRIC_ASIC_TABLE entries")

for asic in asic_list:
chassisdb.delete("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic" + str(asic))

# Start the services in case of the users just execute issue command "systemctl stop swss@/syncd@"
# without bring down the hardware
for asic in asic_list:
# To address systemd service restart limit by resetting the count
clicommon.run_command('sudo systemctl reset-failed swss@{}.service'.format(asic))
click.echo("Start swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl start swss@{}.service'.format(asic))
elif state == "up":
for asic in asic_list:
click.echo("Start swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl start swss@{}.service'.format(asic))

#
# 'shutdown' subcommand ('config chassis_modules shutdown ...')
#
Expand All @@ -33,8 +113,17 @@ def shutdown_chassis_module(db, chassis_module_name):
not chassis_module_name.startswith("FABRIC-CARD"):
ctx.fail("'module_name' has to begin with 'SUPERVISOR', 'LINE-CARD' or 'FABRIC-CARD'")

# To avoid duplicate operation
if get_config_module_state(db, chassis_module_name) == 'down':
click.echo("Module {} is already in down state".format(chassis_module_name))
return

click.echo("Shutting down chassis module {}".format(chassis_module_name))
fvs = {'admin_status': 'down'}
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
if chassis_module_name.startswith("FABRIC-CARD"):
if not check_config_module_state_with_timeout(ctx, db, chassis_module_name, 'down'):
fabric_module_set_admin_status(db, chassis_module_name, 'down')

#
# 'startup' subcommand ('config chassis_modules startup ...')
Expand All @@ -45,5 +134,15 @@ def shutdown_chassis_module(db, chassis_module_name):
def startup_chassis_module(db, chassis_module_name):
"""Chassis-module startup of module"""
config_db = db.cfgdb
ctx = click.get_current_context()

# To avoid duplicate operation
if get_config_module_state(db, chassis_module_name) == 'up':
click.echo("Module {} is already set to up state".format(chassis_module_name))
return

click.echo("Starting up chassis module {}".format(chassis_module_name))
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, None)
if chassis_module_name.startswith("FABRIC-CARD"):
if not check_config_module_state_with_timeout(ctx, db, chassis_module_name, 'up'):
fabric_module_set_admin_status(db, chassis_module_name, 'up')
113 changes: 113 additions & 0 deletions tests/chassis_modules_test.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import tests.mock_tables.dbconnector
from utilities_common.db import Db
from .utils import get_result_and_return_code
from unittest import mock
sys.modules['clicommon'] = mock.Mock()

show_linecard0_shutdown_output="""\
LINE-CARD0 line-card 1 Empty down LC1000101
Expand All @@ -15,6 +17,15 @@
show_linecard0_startup_output="""\
LINE-CARD0 line-card 1 Empty up LC1000101
"""

show_fabriccard0_shutdown_output = """\
FABRIC-CARD0 fabric-card 17 Online down FC1000101
"""

show_fabriccard0_startup_output = """\
FABRIC-CARD0 fabric-card 17 Online up FC1000101
"""

header_lines = 2
warning_lines = 0

Expand Down Expand Up @@ -113,6 +124,11 @@
Linecard4|Asic2|PortChannel0001 2 22 Linecard4|Asic2|Ethernet29, Linecard4|Asic2|Ethernet30
"""


def mock_run_command_side_effect(*args, **kwargs):
return '', 0


class TestChassisModules(object):
@classmethod
def setup_class(cls):
Expand Down Expand Up @@ -186,6 +202,47 @@ def test_config_shutdown_module(self):
#db.cfgdb.set_entry("CHASSIS_MODULE", "LINE-CARD0", { "admin_status" : "down" })
#db.get_data("CHASSIS_MODULE", "LINE-CARD0")

def test_config_shutdown_module_fabric(self):
with mock.patch("utilities_common.cli.run_command",
mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
runner = CliRunner()
db = Db()

chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_id_in_module", "0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_pci_address", "nokia-bdb:4:0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "name", "FABRIC-CARD0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_id_in_module", "1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_pci_address", "nokia-bdb:4:1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "name", "FABRIC-CARD0")
chassisdb.close("CHASSIS_STATE_DB")

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["shutdown"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
header_lines = 2
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_shutdown_output.strip('\n')

fvs = {'admin_status': 'down'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)
result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["shutdown"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0
assert mock_run_command.call_count == 6

def test_config_startup_module(self):
runner = CliRunner()
db = Db()
Expand All @@ -202,6 +259,62 @@ def test_config_startup_module(self):
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_linecard0_startup_output.strip('\n')

def test_config_startup_module_fabric(self):
with mock.patch("utilities_common.cli.run_command",
mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
runner = CliRunner()
db = Db()

chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_id_in_module", "0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_pci_address", "nokia-bdb:4:0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "name", "FABRIC-CARD0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_id_in_module", "1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_pci_address", "nokia-bdb:4:1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "name", "FABRIC-CARD0")
chassisdb.close("CHASSIS_STATE_DB")

# FC is down and doing startup
fvs = {'admin_status': 'down'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["startup"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_startup_output.strip('\n')
assert mock_run_command.call_count == 2

# FC is up and doing startup
fvs = {'admin_status': 'up'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["startup"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_startup_output.strip('\n')
assert mock_run_command.call_count == 2

def test_config_incorrect_module(self):
runner = CliRunner()
db = Db()
Expand Down

0 comments on commit 6858d5f

Please sign in to comment.