Skip to content

Commit

Permalink
[Mellanox] add test cases for dynamic minimum fan speed and psu fan s…
Browse files Browse the repository at this point in the history
…peed policy (#1552)
  • Loading branch information
Junchao-Mellanox authored Apr 29, 2020
1 parent 6d1582c commit 7c988f7
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 1 deletion.
3 changes: 3 additions & 0 deletions tests/platform/files/valid_policy.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
Expand Down
47 changes: 47 additions & 0 deletions tests/platform/mellanox/mellanox_thermal_control_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from thermal_control_test_helper import *
from common.mellanox_data import SWITCH_MODELS
from minimum_table import MINIMUM_TABLE

NOT_AVAILABLE = 'N/A'

Expand Down Expand Up @@ -899,3 +900,49 @@ def mock_normal_speed(self):
self.fan_data.mock_speed(AbnormalFanMocker.TARGET_SPEED_VALUE)
self.fan_data.mock_target_speed(AbnormalFanMocker.TARGET_SPEED_VALUE)
self.expect_led_color = 'green'


@mocker('MinTableMocker')
class MinTableMocker(object):
FAN_AMB_PATH = 'fan_amb'
PORT_AMB_PATH = 'port_amb'
TRUST_PATH = 'module1_temp_fault'
def __init__(self, dut):
self.mock_helper = MockerHelper(dut)

def get_expect_cooling_level(self, air_flow_dir, temperature, trust_state):
hwsku = self.mock_helper.dut.facts["hwsku"]
minimum_table = MINIMUM_TABLE[hwsku]
row = minimum_table['{}_{}'.format(air_flow_dir, 'trust' if trust_state else 'untrust')]
temperature = temperature / 1000
for range_str, cooling_level in row.items():
range_str_list = range_str.split(':')
min_temp = int(range_str_list[0])
max_temp = int(range_str_list[1])
if min_temp <= temperature <= max_temp:
return cooling_level - 10

return None

def mock_min_table(self, air_flow_dir, temperature, trust_state):
trust_value = '0' if trust_state else '1'
if air_flow_dir == 'p2c':
fan_temp = temperature
port_temp = temperature - 100
elif air_flow_dir == 'c2p':
fan_temp = temperature - 100
port_temp = temperature
else:
fan_temp = temperature
port_temp = temperature

self.mock_helper.mock_thermal_value(self.FAN_AMB_PATH, str(fan_temp))
self.mock_helper.mock_thermal_value(self.PORT_AMB_PATH, str(port_temp))
self.mock_helper.mock_thermal_value(self.TRUST_PATH, str(trust_value))

def deinit(self):
"""
Destructor of MinTableMocker.
:return:
"""
self.mock_helper.deinit()
90 changes: 90 additions & 0 deletions tests/platform/mellanox/minimum_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
MINIMUM_TABLE= {
'ACS-MSN2700': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
},
'LS-SN2700': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
},
'ACS-MSN2740': {
"p2c_trust": {"-127:120":13},
"p2c_untrust": {"-127:35":13, "36:40":14 , "41:120":15},
"c2p_trust": {"-127:120":13},
"c2p_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
"unk_trust": {"-127:120":13},
"unk_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
},
'ACS-MSN2410': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
},
'Mellanox-SN2700': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
},
'Mellanox-SN2700-D48C8': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
},
'ACS-MSN2100': {
"p2c_trust": {"-127:120":12},
"p2c_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16},
"c2p_trust": {"-127:40":12, "41:120":13},
"c2p_untrust": {"-127:40":12, "41:120":13},
"unk_trust": {"-127:40":12, "41:120":13},
"unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16}
},
'ACS-MSN2010': {
"p2c_trust": {"-127:120":12},
"p2c_untrust": {"-127:15":12, "16:20":13, "21:30":14, "31:35":15, "36:120":16},
"c2p_trust": {"-127:120":12},
"c2p_untrust": {"-127:20":12, "21:25":13 , "26:30":14, "31:35":15, "36:120":16},
"unk_trust": {"-127:120":12},
"unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16}
},
'ACS-MSN3700': {
"p2c_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"p2c_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
"c2p_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"c2p_untrust": {"-127:25":12, "26:40":13 , "41:120":14},
"unk_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
},
'ACS-MSN3800': {
"p2c_trust": {"-127:35":12, "36:120":13},
"p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
"c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16},
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
},
'Mellanox-SN3800-D112C8': {
"p2c_trust": {"-127:35":12, "36:120":13},
"p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
"c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16},
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
},
}
139 changes: 139 additions & 0 deletions tests/platform/mellanox/test_thermal_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import logging
import operator
import pytest
import random
import time
from common.mellanox_data import SWITCH_MODELS
from common.plugins.loganalyzer.loganalyzer import LogAnalyzer
from common.utilities import wait_until
from thermal_control_test_helper import *
from mellanox_thermal_control_test_helper import MockerHelper, AbnormalFanMocker

THERMAL_CONTROL_TEST_WAIT_TIME = 65
THERMAL_CONTROL_TEST_CHECK_INTERVAL = 5

COOLING_CUR_STATE_PATH = '/run/hw-management/thermal/cooling_cur_state'
COOLING_CUR_STATE_THRESHOLD = 7
PSU_PRESENCE_PATH = '/run/hw-management/thermal/psu{}_status'
PSU_SPEED_PATH = '/run/hw-management/thermal/psu{}_fan1_speed_get'
PSU_SPEED_TOLERANCE = 0.25

LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE = '.*Changed minimum cooling level to {}.*'


@pytest.mark.disable_loganalyzer
def test_dynamic_minimum_table(testbed_devices, mocker_factory):
air_flow_dirs = ['p2c', 'c2p', 'unk']
max_temperature = 45000 # 45 C
dut = testbed_devices['dut']
cooling_cur_state = get_cooling_cur_state(dut)
if cooling_cur_state >= COOLING_CUR_STATE_THRESHOLD:
pytest.skip('The cooling level {} is higher than threshold {}.'.format(cooling_cur_state, COOLING_CUR_STATE_THRESHOLD))

mocker = mocker_factory(dut, 'MinTableMocker')
loganalyzer = LogAnalyzer(ansible_host=dut, marker_prefix='thermal_control')
loganalyzer.load_common_config()

for index in range(len(air_flow_dirs)):
air_flow_index = random.randint(0, len(air_flow_dirs) - 1)
air_flow_dir = air_flow_dirs[air_flow_index]
air_flow_dirs.remove(air_flow_dir)
temperature = random.randint(0, max_temperature)
trust_state = True if random.randint(0, 1) else False
logging.info('Testing with air_flow_dir={}, temperature={}, trust_state={}'.format(air_flow_dir, temperature, trust_state))
expect_minimum_cooling_level = mocker.get_expect_cooling_level(air_flow_dir, temperature, trust_state)
loganalyzer.expect_regex = [LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE.format(expect_minimum_cooling_level)]
with loganalyzer:
mocker.mock_min_table(air_flow_dir, temperature, trust_state)
time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME)

temperature = random.randint(0, max_temperature)
logging.info('Testing with air_flow_dir={}, temperature={}, trust_state={}'.format(air_flow_dir, temperature, not trust_state))
expect_minimum_cooling_level = mocker.get_expect_cooling_level(air_flow_dir, temperature, not trust_state)
loganalyzer.expect_regex = [LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE.format(expect_minimum_cooling_level)]
with loganalyzer:
mocker.mock_min_table(air_flow_dir, temperature, not trust_state)
time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME)


@pytest.mark.disable_loganalyzer
def test_set_psu_fan_speed(testbed_devices, mocker_factory):
dut = testbed_devices['dut']
hwsku = dut.facts["hwsku"]
psu_num = SWITCH_MODELS[hwsku]['psus']['number']
hot_swappable = SWITCH_MODELS[hwsku]['psus']['hot_swappable']
if not hot_swappable:
pytest.skip('The SKU {} does not support this test case.'.format(hwsku))

logging.info('Create mocker, it may take a few seconds...')
single_fan_mocker = mocker_factory(dut, 'SingleFanMocker')
logging.info('Mock FAN absence...')
single_fan_mocker.mock_absence()
assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, dut, 10, operator.eq), \
'Current cooling state is {}'.format(get_cooling_cur_state(dut))

logging.info('Wait {} seconds for the policy to take effect...'.format(THERMAL_CONTROL_TEST_CHECK_INTERVAL))
time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL)
full_speeds = []
for index in range(psu_num):
speed = get_psu_speed(dut, index)
full_speeds.append(speed)

logging.info('Full speed={}'.format(full_speeds))
logging.info('Mock FAN presence...')
single_fan_mocker.mock_presence()
assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, dut, 10, operator.ne), \
'Current cooling state is {}'.format(get_cooling_cur_state(dut))
logging.info('Wait {} seconds for the policy to take effect...'.format(THERMAL_CONTROL_TEST_CHECK_INTERVAL))
time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL)
cooling_cur_state = get_cooling_cur_state(dut)
logging.info('Cooling level changed to {}'.format(cooling_cur_state))
current_speeds = []
for index in range(psu_num):
speed = get_psu_speed(dut, index)
current_speeds.append(speed)

logging.info('Current speed={}'.format(current_speeds))
index = 0
if cooling_cur_state < 6:
cooling_cur_state = 6
expect_multiple = float(10) / cooling_cur_state
while index < psu_num:
full_speed = full_speeds[index]
current_speed = current_speeds[index]
index += 1
if not full_speed or not current_speed:
continue

actual_multiple = float(full_speed) / current_speed
if expect_multiple > actual_multiple:
assert actual_multiple > expect_multiple * (1 - PSU_SPEED_TOLERANCE)
elif expect_multiple < actual_multiple:
assert actual_multiple < expect_multiple * (1 + PSU_SPEED_TOLERANCE)


def get_psu_speed(dut, index):
index = index + 1
psu_speed_path = PSU_SPEED_PATH.format(index)
file_exists = dut.stat(path=psu_speed_path)
if not file_exists:
return None

cmd_output = dut.command('cat {}'.format(psu_speed_path))
try:
return int(cmd_output['stdout'])
except Exception as e:
assert False, 'Bad content in {} - {}'.format(psu_speed_path, e)


def get_cooling_cur_state(dut):
cmd_output = dut.command('cat {}'.format(COOLING_CUR_STATE_PATH))
try:
return int(cmd_output['stdout'])
except Exception as e:
assert False, 'Bad content in {} - {}'.format(COOLING_CUR_STATE_PATH, e)


def check_cooling_cur_state(dut, expect_value, op):
actual_value = get_cooling_cur_state(dut)
return op(actual_value, expect_value)
2 changes: 1 addition & 1 deletion tests/platform/thermal_control_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def restart_thermal_control_daemon(dut):
assert output["rc"] == 0, "Run command '%s' failed" % kill_thermalctld_cmd

# make sure thermalctld has restarted
max_wait_time = 5
max_wait_time = 30
while max_wait_time > 0:
max_wait_time -= 1
output = dut.command(find_thermalctld_pid_cmd)
Expand Down

0 comments on commit 7c988f7

Please sign in to comment.