Skip to content

Commit

Permalink
Adjust the logic to check PSU power threshold
Browse files Browse the repository at this point in the history
Check the system power instead of PSU power against a single PSU's threshold

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs committed Jun 1, 2023
1 parent 4f9ea2d commit 34c9d4a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
18 changes: 14 additions & 4 deletions sonic-psud/scripts/psud
Original file line number Diff line number Diff line change
Expand Up @@ -535,25 +535,35 @@ class DaemonPsud(daemon_base.DaemonBase):
power_warning_suppress_threshold = try_get(psu.get_psu_power_warning_suppress_threshold, NOT_AVAILABLE)
power_critical_threshold = try_get(psu.get_psu_power_critical_threshold, NOT_AVAILABLE)
if psu_status.check_psu_power_threshold:
# Calculate total power
system_power = float(power)
for _, other_psu in enumerate(platform_chassis.get_all_psus()):
if other_psu is psu:
# Skip the current PSU
continue
power_str = try_get(other_psu.get_power, NOT_AVAILABLE)
if power_str != NOT_AVAILABLE:
system_power += float(power_str)

if power_warning_suppress_threshold == NOT_AVAILABLE or power_critical_threshold == NOT_AVAILABLE:
self.log_error("PSU power thresholds become invalid: threshold {} critical threshold {}".format(power_warning_suppress_threshold, power_critical_threshold))
psu_status.check_psu_power_threshold = False
psu_status.power_exceeded_threshold = False
elif psu_status.power_exceeded_threshold:
# The failing threshold is the warning threshold
if power < power_warning_suppress_threshold:
if system_power < power_warning_suppress_threshold:
# Clear alarm
power_exceeded_threshold = False
else:
# The rising threshold is the critical threshold
if power >= power_critical_threshold:
if system_power >= power_critical_threshold:
# Raise alarm
power_exceeded_threshold = True

if psu_status.set_power_exceed_threshold(power_exceeded_threshold):
log_on_status_changed(self, not psu_status.power_exceeded_threshold,
'PSU power warning cleared: {} power {} is back to normal.'.format(name, power),
'PSU power warning: {} power {} exceeds critical threshold {}.'.format(name, power, power_critical_threshold))
'PSU power warning cleared: system power {} is back to normal, below {} warning suppress threshold {}.'.format(system_power, name, power_warning_suppress_threshold),
'PSU power warning: system power {} exceeds {} critical threshold {}.'.format(system_power, name, power_critical_threshold))

if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold):
set_led = True
Expand Down
21 changes: 12 additions & 9 deletions sonic-psud/tests/test_DaemonPsud.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,22 @@ def test_power_threshold(self):
psu = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234')
psud.platform_chassis = MockChassis()
psud.platform_chassis._psu_list.append(psu)
another_psu = MockPsu('PSU 2', 0, True, 'Fake Model', '12345678', '1234')
another_psu.set_power(10.0)
psud.platform_chassis._psu_list.append(another_psu)

daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER)

daemon_psud.psu_tbl = mock.MagicMock()
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0)
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=110.0)
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=130.0)
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=120.0)

# Normal start. All good and all thresholds are supported
# Power is in normal range (below warning threshold)
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(100.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(100.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -213,7 +216,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(115.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -224,7 +227,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(125.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -235,7 +238,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(115.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -246,7 +249,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(105.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
daemon_psud._update_led_color()
Expand All @@ -257,7 +260,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
expected_fvp = self._construct_expected_fvp(125.0, 120.0, 130.0, True)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand All @@ -268,7 +271,7 @@ def test_power_threshold(self):
daemon_psud._update_single_psu_data(1, psu)
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
expected_fvp = self._construct_expected_fvp(105.0, 120.0, 130.0, False)
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
daemon_psud._update_led_color()
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
Expand Down

0 comments on commit 34c9d4a

Please sign in to comment.