Skip to content

Commit

Permalink
Disable systemd auto-restart of dependent services for spineRouters (#…
Browse files Browse the repository at this point in the history
…17203)

Currently hostcfgd script overrides the systemd service files of the features depending upon auto_restart enable/disable.
I am skipping dependent features(syncd, gbsyncd for now) to have "RESTART=Always"
for them to not start immediately, and instead get started by SWSS through swss.sh script.
The issue of syncd double stop is also applicable to pizza box platforms, however no traffic impact is seen there, whereas on VOQ chassis, we do see traffic impact due to early start of syncd service.
  • Loading branch information
deepak-singhal0408 authored Nov 20, 2023
1 parent f59dc50 commit 894046e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 19 deletions.
15 changes: 14 additions & 1 deletion src/sonic-host-services/scripts/hostcfgd
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,20 @@ class FeatureHandler(object):
Returns:
None.
"""
restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no"
# As per the current code(due to various dependencies) SWSS service stop/start also stops/starts the dependent services(syncd, teamd, bgpd etc)
# There is an issue seen of syncd service getting stopped twice upon a critical process crash in syncd service due to above reason.
# Also early start of syncd service has traffic impact on VOQ chassis.
# to fix the above issue, we are disabling the auto restart of syncd service as it will be started by swss service.
# This change can be extended to other dependent services as well in future and also on pizza box platforms.

device_type = self._device_config.get('DEVICE_METADATA', {}).get('localhost', {}).get('type')
is_dependent_service = feature_config.name in ['syncd', 'gbsyncd']
if device_type == 'SpineRouter' and is_dependent_service:
syslog.syslog(syslog.LOG_INFO, "Skipped setting Restart field in systemd for {}".format(feature_config.name))
restart_field_str = "no"
else:
restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no"

feature_systemd_config = "[Service]\nRestart={}\n".format(restart_field_str)
feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature_config)

Expand Down
16 changes: 11 additions & 5 deletions src/sonic-host-services/tests/hostcfgd/hostcfgd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def checks_config_table(self, feature_table, expected_table):

return True if not ddiff else False

def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=None):
def checks_systemd_config_file(self, device_type, feature_table, feature_systemd_name_map=None):
"""Checks whether the systemd configuration file of each feature was created or not
and whether the `Restart=` field in the file is set correctly or not.
Expand All @@ -62,6 +62,7 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non
'auto_restart.conf')

for feature_name in feature_table:
is_dependent_feature = True if feature_name in ['syncd', 'gbsyncd'] else False
auto_restart_status = feature_table[feature_name].get('auto_restart', 'disabled')
if "enabled" in auto_restart_status:
auto_restart_status = "enabled"
Expand All @@ -77,7 +78,10 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non

with open(feature_systemd_config_file_path) as systemd_config_file:
status = systemd_config_file.read().strip()
assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status])
if device_type == 'SpineRouter' and is_dependent_feature:
assert status == '[Service]\nRestart=no'
else:
assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status])

def get_state_db_set_calls(self, feature_table):
"""Returns a Mock call objects which recorded the `set` calls to `FEATURE` table in `STATE_DB`.
Expand Down Expand Up @@ -134,6 +138,7 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs):
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
device_config.update(config_data['device_runtime_metadata'])
device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type']

feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
feature_table = MockConfigDb.CONFIG_DB['FEATURE']
Expand All @@ -158,13 +163,13 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs):

feature_table_state_db_calls = self.get_state_db_set_calls(feature_table)

self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
any_order=True)
feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)

@parameterized.expand(HOSTCFGD_TEST_VECTOR)
@patchfs
Expand Down Expand Up @@ -196,6 +201,7 @@ def test_handler(self, test_scenario_name, config_data, fs):
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
device_config.update(config_data['device_runtime_metadata'])
device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)

feature_table = MockConfigDb.CONFIG_DB['FEATURE']
Expand All @@ -207,7 +213,7 @@ def test_handler(self, test_scenario_name, config_data, fs):
feature_names, _ = feature_handler.get_multiasic_feature_instances(feature)
feature_systemd_name_map[feature_name] = feature_names

self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
Expand Down
111 changes: 98 additions & 13 deletions src/sonic-host-services/tests/hostcfgd/test_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,8 +580,14 @@
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},


"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
Expand Down Expand Up @@ -610,12 +616,23 @@
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
call("sudo systemctl stop bgp.service", shell=True),
call("sudo systemctl disable bgp.service", shell=True),
call("sudo systemctl mask bgp.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
Expand Down Expand Up @@ -675,8 +692,14 @@
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},


"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
Expand Down Expand Up @@ -705,12 +728,23 @@
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
call("sudo systemctl stop bgp.service", shell=True),
call("sudo systemctl disable bgp.service", shell=True),
call("sudo systemctl mask bgp.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
Expand Down Expand Up @@ -770,8 +804,14 @@
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},


"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
Expand Down Expand Up @@ -800,6 +840,14 @@
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
Expand All @@ -809,7 +857,9 @@
call("sudo systemctl start teamd.service", shell=True),
call("sudo systemctl enable teamd.service", shell=True),
call("sudo systemctl unmask teamd.service", shell=True),

call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
Expand Down Expand Up @@ -869,8 +919,14 @@
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},


"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
Expand Down Expand Up @@ -899,6 +955,14 @@
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
Expand All @@ -908,7 +972,9 @@
call("sudo systemctl start teamd.service", shell=True),
call("sudo systemctl enable teamd.service", shell=True),
call("sudo systemctl unmask teamd.service", shell=True),

call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
Expand Down Expand Up @@ -969,8 +1035,14 @@
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},


"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "False",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
Expand Down Expand Up @@ -999,6 +1071,14 @@
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "False",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
Expand All @@ -1020,7 +1100,12 @@
call("sudo systemctl stop lldp@1.service", shell=True),
call("sudo systemctl disable lldp@1.service", shell=True),
call("sudo systemctl mask lldp@1.service", shell=True),

call("sudo systemctl start syncd@0.service", shell=True),
call("sudo systemctl enable syncd@0.service", shell=True),
call("sudo systemctl unmask syncd@0.service", shell=True),
call("sudo systemctl start syncd@1.service", shell=True),
call("sudo systemctl enable syncd@1.service", shell=True),
call("sudo systemctl unmask syncd@1.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
Expand Down

0 comments on commit 894046e

Please sign in to comment.