diff --git a/tests/platform/files/valid_policy.json b/tests/platform/files/valid_policy.json index 036bae8c1e..9343746d37 100644 --- a/tests/platform/files/valid_policy.json +++ b/tests/platform/files/valid_policy.json @@ -9,6 +9,9 @@ }, { "type": "psu_info" + }, + { + "type": "chassis_info" } ], "policies": [ diff --git a/tests/platform/mellanox/mellanox_thermal_control_test_helper.py b/tests/platform/mellanox/mellanox_thermal_control_test_helper.py index 7e04a9c747..32d6a73585 100644 --- a/tests/platform/mellanox/mellanox_thermal_control_test_helper.py +++ b/tests/platform/mellanox/mellanox_thermal_control_test_helper.py @@ -3,6 +3,7 @@ import logging from thermal_control_test_helper import * from common.mellanox_data import SWITCH_MODELS +from minimum_table import MINIMUM_TABLE NOT_AVAILABLE = 'N/A' @@ -899,3 +900,49 @@ def mock_normal_speed(self): self.fan_data.mock_speed(AbnormalFanMocker.TARGET_SPEED_VALUE) self.fan_data.mock_target_speed(AbnormalFanMocker.TARGET_SPEED_VALUE) self.expect_led_color = 'green' + + +@mocker('MinTableMocker') +class MinTableMocker(object): + FAN_AMB_PATH = 'fan_amb' + PORT_AMB_PATH = 'port_amb' + TRUST_PATH = 'module1_temp_fault' + def __init__(self, dut): + self.mock_helper = MockerHelper(dut) + + def get_expect_cooling_level(self, air_flow_dir, temperature, trust_state): + hwsku = self.mock_helper.dut.facts["hwsku"] + minimum_table = MINIMUM_TABLE[hwsku] + row = minimum_table['{}_{}'.format(air_flow_dir, 'trust' if trust_state else 'untrust')] + temperature = temperature / 1000 + for range_str, cooling_level in row.items(): + range_str_list = range_str.split(':') + min_temp = int(range_str_list[0]) + max_temp = int(range_str_list[1]) + if min_temp <= temperature <= max_temp: + return cooling_level - 10 + + return None + + def mock_min_table(self, air_flow_dir, temperature, trust_state): + trust_value = '0' if trust_state else '1' + if air_flow_dir == 'p2c': + fan_temp = temperature + port_temp = temperature - 100 + elif air_flow_dir == 'c2p': + fan_temp = temperature - 100 + port_temp = temperature + else: + fan_temp = temperature + port_temp = temperature + + self.mock_helper.mock_thermal_value(self.FAN_AMB_PATH, str(fan_temp)) + self.mock_helper.mock_thermal_value(self.PORT_AMB_PATH, str(port_temp)) + self.mock_helper.mock_thermal_value(self.TRUST_PATH, str(trust_value)) + + def deinit(self): + """ + Destructor of MinTableMocker. + :return: + """ + self.mock_helper.deinit() diff --git a/tests/platform/mellanox/minimum_table.py b/tests/platform/mellanox/minimum_table.py new file mode 100644 index 0000000000..78bf900aaf --- /dev/null +++ b/tests/platform/mellanox/minimum_table.py @@ -0,0 +1,90 @@ +MINIMUM_TABLE= { + 'ACS-MSN2700': { + "p2c_trust": {"-127:40":13, "41:120":15}, + "p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}, + "c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16} + }, + 'LS-SN2700': { + "p2c_trust": {"-127:40":13, "41:120":15}, + "p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}, + "c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16} + }, + 'ACS-MSN2740': { + "p2c_trust": {"-127:120":13}, + "p2c_untrust": {"-127:35":13, "36:40":14 , "41:120":15}, + "c2p_trust": {"-127:120":13}, + "c2p_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17}, + "unk_trust": {"-127:120":13}, + "unk_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17}, + }, + 'ACS-MSN2410': { + "p2c_trust": {"-127:40":13, "41:120":15}, + "p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}, + "c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16} + }, + 'Mellanox-SN2700': { + "p2c_trust": {"-127:40":13, "41:120":15}, + "p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}, + "c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16} + }, + 'Mellanox-SN2700-D48C8': { + "p2c_trust": {"-127:40":13, "41:120":15}, + "p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}, + "c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}, + "unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16} + }, + 'ACS-MSN2100': { + "p2c_trust": {"-127:120":12}, + "p2c_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16}, + "c2p_trust": {"-127:40":12, "41:120":13}, + "c2p_untrust": {"-127:40":12, "41:120":13}, + "unk_trust": {"-127:40":12, "41:120":13}, + "unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16} + }, + 'ACS-MSN2010': { + "p2c_trust": {"-127:120":12}, + "p2c_untrust": {"-127:15":12, "16:20":13, "21:30":14, "31:35":15, "36:120":16}, + "c2p_trust": {"-127:120":12}, + "c2p_untrust": {"-127:20":12, "21:25":13 , "26:30":14, "31:35":15, "36:120":16}, + "unk_trust": {"-127:120":12}, + "unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16} + }, + 'ACS-MSN3700': { + "p2c_trust": {"-127:25":12, "26:40":13 , "41:120":14}, + "p2c_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16}, + "c2p_trust": {"-127:25":12, "26:40":13 , "41:120":14}, + "c2p_untrust": {"-127:25":12, "26:40":13 , "41:120":14}, + "unk_trust": {"-127:25":12, "26:40":13 , "41:120":14}, + "unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16}, + }, + 'ACS-MSN3800': { + "p2c_trust": {"-127:35":12, "36:120":13}, + "p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17}, + "c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14}, + "c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16}, + "unk_trust": {"-127:30":12, "31:40":13 , "41:120":14}, + "unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17}, + }, + 'Mellanox-SN3800-D112C8': { + "p2c_trust": {"-127:35":12, "36:120":13}, + "p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17}, + "c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14}, + "c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16}, + "unk_trust": {"-127:30":12, "31:40":13 , "41:120":14}, + "unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17}, + }, +} diff --git a/tests/platform/mellanox/test_thermal_control.py b/tests/platform/mellanox/test_thermal_control.py new file mode 100644 index 0000000000..da25a2be86 --- /dev/null +++ b/tests/platform/mellanox/test_thermal_control.py @@ -0,0 +1,139 @@ +import logging +import operator +import pytest +import random +import time +from common.mellanox_data import SWITCH_MODELS +from common.plugins.loganalyzer.loganalyzer import LogAnalyzer +from common.utilities import wait_until +from thermal_control_test_helper import * +from mellanox_thermal_control_test_helper import MockerHelper, AbnormalFanMocker + +THERMAL_CONTROL_TEST_WAIT_TIME = 65 +THERMAL_CONTROL_TEST_CHECK_INTERVAL = 5 + +COOLING_CUR_STATE_PATH = '/run/hw-management/thermal/cooling_cur_state' +COOLING_CUR_STATE_THRESHOLD = 7 +PSU_PRESENCE_PATH = '/run/hw-management/thermal/psu{}_status' +PSU_SPEED_PATH = '/run/hw-management/thermal/psu{}_fan1_speed_get' +PSU_SPEED_TOLERANCE = 0.25 + +LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE = '.*Changed minimum cooling level to {}.*' + + +@pytest.mark.disable_loganalyzer +def test_dynamic_minimum_table(testbed_devices, mocker_factory): + air_flow_dirs = ['p2c', 'c2p', 'unk'] + max_temperature = 45000 # 45 C + dut = testbed_devices['dut'] + cooling_cur_state = get_cooling_cur_state(dut) + if cooling_cur_state >= COOLING_CUR_STATE_THRESHOLD: + pytest.skip('The cooling level {} is higher than threshold {}.'.format(cooling_cur_state, COOLING_CUR_STATE_THRESHOLD)) + + mocker = mocker_factory(dut, 'MinTableMocker') + loganalyzer = LogAnalyzer(ansible_host=dut, marker_prefix='thermal_control') + loganalyzer.load_common_config() + + for index in range(len(air_flow_dirs)): + air_flow_index = random.randint(0, len(air_flow_dirs) - 1) + air_flow_dir = air_flow_dirs[air_flow_index] + air_flow_dirs.remove(air_flow_dir) + temperature = random.randint(0, max_temperature) + trust_state = True if random.randint(0, 1) else False + logging.info('Testing with air_flow_dir={}, temperature={}, trust_state={}'.format(air_flow_dir, temperature, trust_state)) + expect_minimum_cooling_level = mocker.get_expect_cooling_level(air_flow_dir, temperature, trust_state) + loganalyzer.expect_regex = [LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE.format(expect_minimum_cooling_level)] + with loganalyzer: + mocker.mock_min_table(air_flow_dir, temperature, trust_state) + time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME) + + temperature = random.randint(0, max_temperature) + logging.info('Testing with air_flow_dir={}, temperature={}, trust_state={}'.format(air_flow_dir, temperature, not trust_state)) + expect_minimum_cooling_level = mocker.get_expect_cooling_level(air_flow_dir, temperature, not trust_state) + loganalyzer.expect_regex = [LOG_EXPECT_CHANGE_MIN_COOLING_LEVEL_RE.format(expect_minimum_cooling_level)] + with loganalyzer: + mocker.mock_min_table(air_flow_dir, temperature, not trust_state) + time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME) + + +@pytest.mark.disable_loganalyzer +def test_set_psu_fan_speed(testbed_devices, mocker_factory): + dut = testbed_devices['dut'] + hwsku = dut.facts["hwsku"] + psu_num = SWITCH_MODELS[hwsku]['psus']['number'] + hot_swappable = SWITCH_MODELS[hwsku]['psus']['hot_swappable'] + if not hot_swappable: + pytest.skip('The SKU {} does not support this test case.'.format(hwsku)) + + logging.info('Create mocker, it may take a few seconds...') + single_fan_mocker = mocker_factory(dut, 'SingleFanMocker') + logging.info('Mock FAN absence...') + single_fan_mocker.mock_absence() + assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, dut, 10, operator.eq), \ + 'Current cooling state is {}'.format(get_cooling_cur_state(dut)) + + logging.info('Wait {} seconds for the policy to take effect...'.format(THERMAL_CONTROL_TEST_CHECK_INTERVAL)) + time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL) + full_speeds = [] + for index in range(psu_num): + speed = get_psu_speed(dut, index) + full_speeds.append(speed) + + logging.info('Full speed={}'.format(full_speeds)) + logging.info('Mock FAN presence...') + single_fan_mocker.mock_presence() + assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, dut, 10, operator.ne), \ + 'Current cooling state is {}'.format(get_cooling_cur_state(dut)) + logging.info('Wait {} seconds for the policy to take effect...'.format(THERMAL_CONTROL_TEST_CHECK_INTERVAL)) + time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL) + cooling_cur_state = get_cooling_cur_state(dut) + logging.info('Cooling level changed to {}'.format(cooling_cur_state)) + current_speeds = [] + for index in range(psu_num): + speed = get_psu_speed(dut, index) + current_speeds.append(speed) + + logging.info('Current speed={}'.format(current_speeds)) + index = 0 + if cooling_cur_state < 6: + cooling_cur_state = 6 + expect_multiple = float(10) / cooling_cur_state + while index < psu_num: + full_speed = full_speeds[index] + current_speed = current_speeds[index] + index += 1 + if not full_speed or not current_speed: + continue + + actual_multiple = float(full_speed) / current_speed + if expect_multiple > actual_multiple: + assert actual_multiple > expect_multiple * (1 - PSU_SPEED_TOLERANCE) + elif expect_multiple < actual_multiple: + assert actual_multiple < expect_multiple * (1 + PSU_SPEED_TOLERANCE) + + +def get_psu_speed(dut, index): + index = index + 1 + psu_speed_path = PSU_SPEED_PATH.format(index) + file_exists = dut.stat(path=psu_speed_path) + if not file_exists: + return None + + cmd_output = dut.command('cat {}'.format(psu_speed_path)) + try: + return int(cmd_output['stdout']) + except Exception as e: + assert False, 'Bad content in {} - {}'.format(psu_speed_path, e) + + +def get_cooling_cur_state(dut): + cmd_output = dut.command('cat {}'.format(COOLING_CUR_STATE_PATH)) + try: + return int(cmd_output['stdout']) + except Exception as e: + assert False, 'Bad content in {} - {}'.format(COOLING_CUR_STATE_PATH, e) + + +def check_cooling_cur_state(dut, expect_value, op): + actual_value = get_cooling_cur_state(dut) + return op(actual_value, expect_value) diff --git a/tests/platform/thermal_control_test_helper.py b/tests/platform/thermal_control_test_helper.py index aa19c36f45..219255b60c 100644 --- a/tests/platform/thermal_control_test_helper.py +++ b/tests/platform/thermal_control_test_helper.py @@ -314,7 +314,7 @@ def restart_thermal_control_daemon(dut): assert output["rc"] == 0, "Run command '%s' failed" % kill_thermalctld_cmd # make sure thermalctld has restarted - max_wait_time = 5 + max_wait_time = 30 while max_wait_time > 0: max_wait_time -= 1 output = dut.command(find_thermalctld_pid_cmd)