Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PSU power threshold] Fix logic error: compare the system power with the PSU's power threshold #367

Merged
merged 2 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions sonic-psud/scripts/psud
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ class DaemonPsud(daemon_base.DaemonBase):
self.psu_tbl = None
self.psu_chassis_info = None
self.first_run = True
self.psu_threshold_exceeded_logged = False

global platform_psuutil
global platform_chassis
Expand Down Expand Up @@ -458,6 +459,7 @@ class DaemonPsud(daemon_base.DaemonBase):
if not platform_chassis:
return

self.psu_threshold_exceeded_logged = False
for index, psu in enumerate(platform_chassis.get_all_psus()):
try:
self._update_single_psu_data(index + 1, psu)
Expand Down Expand Up @@ -535,25 +537,37 @@ class DaemonPsud(daemon_base.DaemonBase):
power_warning_suppress_threshold = try_get(psu.get_psu_power_warning_suppress_threshold, NOT_AVAILABLE)
power_critical_threshold = try_get(psu.get_psu_power_critical_threshold, NOT_AVAILABLE)
if psu_status.check_psu_power_threshold:
# Calculate total power
system_power = float(power)
for _, other_psu in enumerate(platform_chassis.get_all_psus()):
if other_psu is psu:
# Skip the current PSU
continue
power_str = try_get(other_psu.get_power, NOT_AVAILABLE)
if power_str != NOT_AVAILABLE:
system_power += float(power_str)

if power_warning_suppress_threshold == NOT_AVAILABLE or power_critical_threshold == NOT_AVAILABLE:
self.log_error("PSU power thresholds become invalid: threshold {} critical threshold {}".format(power_warning_suppress_threshold, power_critical_threshold))
psu_status.check_psu_power_threshold = False
psu_status.power_exceeded_threshold = False
elif psu_status.power_exceeded_threshold:
# The failing threshold is the warning threshold
if power < power_warning_suppress_threshold:
if system_power < power_warning_suppress_threshold:
# Clear alarm
power_exceeded_threshold = False
else:
# The rising threshold is the critical threshold
if power >= power_critical_threshold:
if system_power >= power_critical_threshold:
# Raise alarm
power_exceeded_threshold = True

if psu_status.set_power_exceed_threshold(power_exceeded_threshold):
if psu_status.set_power_exceed_threshold(power_exceeded_threshold) and not self.psu_threshold_exceeded_logged:
# Since this is a system level PSU power exceeding check, we do not need to log it for each PSU
log_on_status_changed(self, not psu_status.power_exceeded_threshold,
'PSU power warning cleared: {} power {} is back to normal.'.format(name, power),
'PSU power warning: {} power {} exceeds critical threshold {}.'.format(name, power, power_critical_threshold))
'PSU power warning cleared: system power {} is back to normal, below the warning suppress threshold {}.'.format(system_power, power_warning_suppress_threshold),
'PSU power warning: system power {} exceeds the critical threshold {}.'.format(system_power, power_critical_threshold))
self.psu_threshold_exceeded_logged = True

if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold):
set_led = True
Expand Down
21 changes: 12 additions & 9 deletions sonic-psud/tests/test_DaemonPsud.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,22 @@ def test_power_threshold(self):
psu = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234')
psud.platform_chassis = MockChassis()
psud.platform_chassis._psu_list.append(psu)
another_psu = MockPsu('PSU 2', 0, True, 'Fake Model', '12345678', '1234')
another_psu.set_power(10.0)
psud.platform_chassis._psu_list.append(another_psu)

daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER)

daemon_psud.psu_tbl = mock.MagicMock()
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0)
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=110.0)
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=130.0)
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=120.0)

# Normal start. All good and all thresholds are supported
# Power is in normal range (below warning threshold)
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(100.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(100.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -213,7 +216,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(115.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -224,7 +227,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(125.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -235,7 +238,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(115.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -246,7 +249,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(105.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
daemon_psud._update_led_color()
Expand All @@ -257,7 +260,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(125.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -268,7 +271,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(105.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand Down