From d864b18cda068630b42fb00161b2415d9e4b100e Mon Sep 17 00:00:00 2001 From: junchao Date: Mon, 12 Apr 2021 16:22:12 +0800 Subject: [PATCH 1/9] Add bitmap support for SFP error event --- sonic-xcvrd/xcvrd/xcvrd.py | 49 +++++++++++-------- .../xcvrd/xcvrd_utilities/y_cable_helper.py | 30 ++++++------ 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 0ae0a0cfd..1801ed646 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -47,13 +47,14 @@ SFP_STATUS_REMOVED = '0' SFP_STATUS_INSERTED = '1' -# SFP error code enum, new elements can be added to the enum if new errors need to be supported. -SFP_STATUS_ERR_ENUM = Enum('SFP_STATUS_ERR_ENUM', ['SFP_STATUS_ERR_I2C_STUCK', 'SFP_STATUS_ERR_BAD_EEPROM', - 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', 'SFP_STATUS_ERR_HIGH_TEMP', - 'SFP_STATUS_ERR_BAD_CABLE'], start=2) - -# Convert the error code to string and store them in a set for convenience -errors_block_eeprom_reading = set(str(error_code.value) for error_code in SFP_STATUS_ERR_ENUM) +# SFP error code dictinary, new elements can be added if new errors need to be supported. +SFP_STATUS_ERR_DICT = { + 2: 'SFP_STATUS_ERR_I2C_STUCK', + 4: 'SFP_STATUS_ERR_BAD_EEPROM', + 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', + 16: 'SFP_STATUS_ERR_HIGH_TEMP', + 32: 'SFP_STATUS_ERR_BAD_CABLE' +} EVENT_ON_ALL_SFP = '-1' # events definition @@ -771,9 +772,23 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait): # Update port SFP status table on receiving SFP change event -def update_port_transceiver_status_table(logical_port_name, status_tbl, status): - fvs = swsscommon.FieldValuePairs([('status', status)]) - status_tbl.set(logical_port_name, fvs) +def update_port_transceiver_status_table(logical_port_name, status_tbl, status, has_error=False): + if not has_error: + fvs = swsscommon.FieldValuePairs([('status', status), ('error', 'N/A')]) + status_tbl.set(logical_port_name, fvs) + else: + error_list = [] + int_status = int(status) + for error_code, error_msg in SFP_STATUS_ERR_DICT.items(): + if error_code & int_status: + error_list.append(error_msg) + if error_list: + fvs = swsscommon.FieldValuePairs([('status', status), ('error', '|'.join(error_list))]) + status_tbl.set(logical_port_name, fvs) + else: + # SFP return unkown event, just ignore for now. + helper_logger.log_warning("Got unknown event {}, ignored".format(status)) + # Delete port from SFP status table @@ -788,10 +803,7 @@ def detect_port_in_error_status(logical_port_name, status_tbl): rec, fvp = status_tbl.get(logical_port_name) if rec: status_dict = dict(fvp) - if status_dict['status'] in errors_block_eeprom_reading: - return True - else: - return False + return 'error' in status_dict and status_dict['error'] != 'N/A' else: return False @@ -1097,23 +1109,18 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): logical_port, status_tbl[asic_index], SFP_STATUS_REMOVED) helper_logger.log_info("receive plug out and pdate port sfp status table.") del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index]) - elif value in errors_block_eeprom_reading: + else: helper_logger.log_info("Got SFP Error event") # Add port to error table to stop accessing eeprom of it # If the port already in the error table, the stored error code will # be updated to the new one. - update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value) + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, True) helper_logger.log_info("receive error update port sfp status table.") # In this case EEPROM is not accessible, so remove the DOM info # since it will be outdated if long time no update. # but will keep the interface info in the DB since it static. del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) - else: - # SFP return unkown event, just ignore for now. - helper_logger.log_warning("Got unknown event {}, ignored".format(value)) - continue - # Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port y_cable_helper.change_ports_status_for_y_cable_change_event( port_dict, y_cable_presence, stopping_event) diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py index a7eb9e355..0fdcf7e43 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py @@ -26,20 +26,13 @@ SFP_STATUS_REMOVED = '0' SFP_STATUS_INSERTED = '1' -# SFP error codes, stored as strings. Can add more as needed. -SFP_STATUS_ERR_I2C_STUCK = '2' -SFP_STATUS_ERR_BAD_EEPROM = '3' -SFP_STATUS_ERR_UNSUPPORTED_CABLE = '4' -SFP_STATUS_ERR_HIGH_TEMP = '5' -SFP_STATUS_ERR_BAD_CABLE = '6' - -# Store the error codes in a set for convenience -errors_block_eeprom_reading = { - SFP_STATUS_ERR_I2C_STUCK, - SFP_STATUS_ERR_BAD_EEPROM, - SFP_STATUS_ERR_UNSUPPORTED_CABLE, - SFP_STATUS_ERR_HIGH_TEMP, - SFP_STATUS_ERR_BAD_CABLE +# SFP error code dictinary, new elements can be added if new errors need to be supported. +SFP_STATUS_ERR_DICT = { + 2: 'SFP_STATUS_ERR_I2C_STUCK', + 4: 'SFP_STATUS_ERR_BAD_EEPROM', + 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', + 16: 'SFP_STATUS_ERR_HIGH_TEMP', + 32: 'SFP_STATUS_ERR_BAD_CABLE' } Y_CABLE_STATUS_NO_TOR_ACTIVE = 0 @@ -439,7 +432,7 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st helper_logger.log_info("Got SFP inserted event") check_identifier_presence_and_update_mux_table_entry( state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence) - elif value == SFP_STATUS_REMOVED or value in errors_block_eeprom_reading: + elif value == SFP_STATUS_REMOVED or is_error_sfp_status(value): check_identifier_presence_and_delete_mux_table_entry( state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) @@ -990,6 +983,13 @@ def post_mux_info_to_db(is_warm_start, stop_event=threading.Event()): post_port_mux_info_to_db(logical_port_name, mux_tbl[asic_index]) +def is_error_sfp_status(status): + int_status = int(status) + for error_code in SFP_STATUS_ERR_DICT.keys(): + if int_status & error_code: + return True + return False + # Thread wrapper class to update y_cable status periodically class YCableTableUpdateTask(object): def __init__(self): From 84f2fa3c22579f275af224bc6b67839a406969a1 Mon Sep 17 00:00:00 2001 From: junchao Date: Wed, 14 Apr 2021 11:43:20 +0800 Subject: [PATCH 2/9] Add unit test for sfp error bitmap --- sonic-xcvrd/tests/test_xcvrd.py | 60 +++++++++++++++++++++++++++++++++ sonic-xcvrd/xcvrd/xcvrd.py | 2 +- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index a4d64b68d..2c50380d6 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -318,3 +318,63 @@ def test_get_media_settings_key(self): result = get_media_settings_key(0, xcvr_info_dict) assert result == ['MOLEX-1064141421', 'QSFP+'] # TODO: Ensure that error message was logged + + def test_update_port_transceiver_status_table(self): + logical_port_name = "Ethernet0" + status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED) + entry = status_tbl.get(logical_port_name) + print(entry[1]) + print(entry[0][0]) + assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED + assert status_tbl.get(logical_port_name)[1][1] == 'N/A' + + update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) + assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_REMOVED + assert status_tbl.get(logical_port_name)[1][1] == 'N/A' + + error_dict = { + '3': 'SFP_STATUS_ERR_I2C_STUCK', + '5': 'SFP_STATUS_ERR_BAD_EEPROM', + '9': 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', + '17': 'SFP_STATUS_ERR_HIGH_TEMP', + '33': 'SFP_STATUS_ERR_BAD_CABLE' + } + + # Test single errors + for error_value, error_msg in error_dict.items(): + update_port_transceiver_status_table(logical_port_name, status_tbl, error_value, True) + assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED + assert status_tbl.get(logical_port_name)[1][1] == error_msg + + # Test multiple errors + update_port_transceiver_status_table(logical_port_name, status_tbl, '63', True) + assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED + error = status_tbl.get(logical_port_name)[1][1] + for error_msg in error_dict.values(): + assert error_msg in error + + # Test unsupported errors + status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + update_port_transceiver_status_table(logical_port_name, status_tbl, '1024', True) + assert status_tbl.get(logical_port_name) is None + + def test_detect_port_in_error_status(self): + class MockTable: + def get(self, key): + pass + + status_tbl = MockTable() + status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'})) + assert not detect_port_in_error_status(None, status_tbl) + + status_tbl.get = MagicMock(return_value=(True, {'error': 'something'})) + assert detect_port_in_error_status(None, status_tbl) + + def test_is_error_sfp_status(self): + error_values = ['3', '5', '9', '17', '33'] + for error_value in error_values: + assert is_error_sfp_status(error_value) + + assert not is_error_sfp_status(SFP_STATUS_INSERTED) + assert not is_error_sfp_status(SFP_STATUS_REMOVED) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 1801ed646..b1e1e77a6 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -783,7 +783,7 @@ def update_port_transceiver_status_table(logical_port_name, status_tbl, status, if error_code & int_status: error_list.append(error_msg) if error_list: - fvs = swsscommon.FieldValuePairs([('status', status), ('error', '|'.join(error_list))]) + fvs = swsscommon.FieldValuePairs([('status', str(int_status & 1)), ('error', '|'.join(error_list))]) status_tbl.set(logical_port_name, fvs) else: # SFP return unkown event, just ignore for now. From 20d8f27aa8118301fef27ee615a407b18ba21998 Mon Sep 17 00:00:00 2001 From: junchao Date: Mon, 26 Apr 2021 11:03:50 +0800 Subject: [PATCH 3/9] Move common code to sfp_status_helper.py --- sonic-xcvrd/tests/test_xcvrd.py | 28 ++++++----- sonic-xcvrd/xcvrd/xcvrd.py | 49 ++++++------------- .../xcvrd_utilities/sfp_status_helper.py | 35 +++++++++++++ .../xcvrd/xcvrd_utilities/y_cable_helper.py | 26 ++-------- 4 files changed, 67 insertions(+), 71 deletions(-) create mode 100644 sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 2c50380d6..7105fbb36 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -24,13 +24,15 @@ test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) scripts_path = os.path.join(modules_path, "xcvrd") -helper_file_path = os.path.join(scripts_path, "xcvrd_utilities"+"/y_cable_helper.py") +#helper_file_path = os.path.join(scripts_path, "xcvrd_utilities"+"/y_cable_helper.py") sys.path.insert(0, modules_path) os.environ["XCVRD_UNIT_TESTING"] = "1" -load_source('y_cable_helper', scripts_path + '/xcvrd_utilities/y_cable_helper.py') -from y_cable_helper import * +#load_source('y_cable_helper', scripts_path + '/xcvrd_utilities/y_cable_helper.py') +#load_source('sfp_status_helper', scripts_path + '/xcvrd_utilities/sfp_status_helper.py') +from xcvrd.xcvrd_utilities.y_cable_helper import * from xcvrd.xcvrd import * +from xcvrd.xcvrd_utilities.sfp_status_helper import * class TestXcvrdScript(object): @@ -219,9 +221,9 @@ def test_init_port_sfp_status_tbl(self): init_port_sfp_status_tbl(stop_event) @patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0])) - @patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) - @patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) - @patch('y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self', + @patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) + @patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self', 'mux_direction': 'self', 'manual_switch_count': '7', 'auto_switch_count': '71', @@ -258,9 +260,9 @@ def test_post_port_mux_info_to_db(self): assert(rc != -1) @patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0])) - @patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) - @patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) - @patch('y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self', + @patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) + @patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self', 'nic_lane1_precursor1': '1', 'nic_lane1_precursor2': '-7', 'nic_lane1_maincursor': '-1', @@ -368,13 +370,13 @@ def get(self, key): status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'})) assert not detect_port_in_error_status(None, status_tbl) - status_tbl.get = MagicMock(return_value=(True, {'error': 'something'})) + status_tbl.get = MagicMock(return_value=(True, {'error': 'SFP_STATUS_ERR_I2C_STUCK'})) assert detect_port_in_error_status(None, status_tbl) def test_is_error_sfp_status(self): error_values = ['3', '5', '9', '17', '33'] for error_value in error_values: - assert is_error_sfp_status(error_value) + assert is_error_block_eeprom_reading(error_value) - assert not is_error_sfp_status(SFP_STATUS_INSERTED) - assert not is_error_sfp_status(SFP_STATUS_REMOVED) + assert not is_error_block_eeprom_reading(SFP_STATUS_INSERTED) + assert not is_error_block_eeprom_reading(SFP_STATUS_REMOVED) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index b1e1e77a6..86f1fdf8d 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -20,6 +20,7 @@ from sonic_py_common import multi_asic from swsscommon import swsscommon + from .xcvrd_utilities import sfp_status_helper from .xcvrd_utilities import y_cable_helper except ImportError as e: raise ImportError(str(e) + " - required module not found") @@ -43,19 +44,6 @@ TIME_FOR_SFP_READY_SECS = 1 XCVRD_MAIN_THREAD_SLEEP_SECS = 60 -# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase -SFP_STATUS_REMOVED = '0' -SFP_STATUS_INSERTED = '1' - -# SFP error code dictinary, new elements can be added if new errors need to be supported. -SFP_STATUS_ERR_DICT = { - 2: 'SFP_STATUS_ERR_I2C_STUCK', - 4: 'SFP_STATUS_ERR_BAD_EEPROM', - 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', - 16: 'SFP_STATUS_ERR_HIGH_TEMP', - 32: 'SFP_STATUS_ERR_BAD_CABLE' -} - EVENT_ON_ALL_SFP = '-1' # events definition SYSTEM_NOT_READY = 'system_not_ready' @@ -541,7 +529,7 @@ def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event) continue keys = int_tbl[asic_index].getKeys() - if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + if logical_port_name not in keys and not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) @@ -779,7 +767,7 @@ def update_port_transceiver_status_table(logical_port_name, status_tbl, status, else: error_list = [] int_status = int(status) - for error_code, error_msg in SFP_STATUS_ERR_DICT.items(): + for error_code, error_msg in sfp_status_helper.SFP_STATUS_ERR_DICT.items(): if error_code & int_status: error_list.append(error_msg) if error_list: @@ -796,17 +784,6 @@ def update_port_transceiver_status_table(logical_port_name, status_tbl, status, def delete_port_from_status_table(logical_port_name, status_tbl): status_tbl._del(logical_port_name) -# Check whether port in error status - - -def detect_port_in_error_status(logical_port_name, status_tbl): - rec, fvp = status_tbl.get(logical_port_name) - if rec: - status_dict = dict(fvp) - return 'error' in status_dict and status_dict['error'] != 'N/A' - else: - return False - # Init TRANSCEIVER_STATUS table @@ -836,16 +813,16 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): physical_port_list = logical_port_name_to_physical_port_list(logical_port_name) if physical_port_list is None: helper_logger.log_error("No physical ports found for logical port '{}'".format(logical_port_name)) - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) for physical_port in physical_port_list: if stop_event.is_set(): break if not _wrapper_get_presence(physical_port): - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) else: - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_INSERTED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED) # # Helper classes =============================================================== @@ -884,7 +861,7 @@ def task_worker(self, y_cable_presence): logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue - if not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + if not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) if y_cable_presence[0] is True: @@ -1087,11 +1064,11 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port)) continue - if value == SFP_STATUS_INSERTED: + if value == sfp_status_helper.SFP_STATUS_INSERTED: helper_logger.log_info("Got SFP inserted event") # A plugin event will clear the error state. update_port_transceiver_status_table( - logical_port, status_tbl[asic_index], SFP_STATUS_INSERTED) + logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED) helper_logger.log_info("receive plug in and update port sfp status table.") rc = post_port_sfp_info_to_db(logical_port, int_tbl[asic_index], transceiver_dict) # If we didn't get the sfp info, assuming the eeprom is not ready, give a try again. @@ -1103,10 +1080,10 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): post_port_dom_threshold_info_to_db(logical_port, dom_tbl[asic_index]) notify_media_setting(logical_port, transceiver_dict, app_port_tbl[asic_index]) transceiver_dict.clear() - elif value == SFP_STATUS_REMOVED: + elif value == sfp_status_helper.SFP_STATUS_REMOVED: helper_logger.log_info("Got SFP removed event") update_port_transceiver_status_table( - logical_port, status_tbl[asic_index], SFP_STATUS_REMOVED) + logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) helper_logger.log_info("receive plug out and pdate port sfp status table.") del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index]) else: @@ -1119,7 +1096,9 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): # In this case EEPROM is not accessible, so remove the DOM info # since it will be outdated if long time no update. # but will keep the interface info in the DB since it static. - del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) + if sfp_status_helper.is_error_block_eeprom_reading(value): + del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) + # Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port y_cable_helper.change_ports_status_for_y_cable_change_event( diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py new file mode 100644 index 000000000..087200ac6 --- /dev/null +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py @@ -0,0 +1,35 @@ +# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase +SFP_STATUS_REMOVED = '0' +SFP_STATUS_INSERTED = '1' + +# SFP error code dictinary, new elements can be added if new errors need to be supported. +SFP_STATUS_ERR_DICT = { + 2: 'SFP_STATUS_ERR_I2C_STUCK', + 4: 'SFP_STATUS_ERR_BAD_EEPROM', + 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', + 16: 'SFP_STATUS_ERR_HIGH_TEMP', + 32: 'SFP_STATUS_ERR_BAD_CABLE' +} + +error_code_block_eeprom_reading = set((error_code for error_code in SFP_STATUS_ERR_DICT.keys())) +error_str_block_eeprom_reading = set((error for error in SFP_STATUS_ERR_DICT.values())) + + +def is_error_block_eeprom_reading(status): + int_status = int(status) + for error_code in error_code_block_eeprom_reading: + if int_status & error_code: + return True + return False + + +def detect_port_in_error_status(logical_port_name, status_tbl): + rec, fvp = status_tbl.get(logical_port_name) + if rec: + status_dict = dict(fvp) + if 'error' in status_dict: + for error in error_str_block_eeprom_reading: + if error in status_dict['error']: + return True + return False + diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py index 0fdcf7e43..a19cfac34 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py @@ -10,6 +10,7 @@ from sonic_py_common import multi_asic from sonic_y_cable import y_cable from swsscommon import swsscommon +from . import sfp_status_helper SELECT_TIMEOUT = 1000 @@ -21,20 +22,6 @@ helper_logger = logger.Logger(SYSLOG_IDENTIFIER) - -# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase -SFP_STATUS_REMOVED = '0' -SFP_STATUS_INSERTED = '1' - -# SFP error code dictinary, new elements can be added if new errors need to be supported. -SFP_STATUS_ERR_DICT = { - 2: 'SFP_STATUS_ERR_I2C_STUCK', - 4: 'SFP_STATUS_ERR_BAD_EEPROM', - 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', - 16: 'SFP_STATUS_ERR_HIGH_TEMP', - 32: 'SFP_STATUS_ERR_BAD_CABLE' -} - Y_CABLE_STATUS_NO_TOR_ACTIVE = 0 Y_CABLE_STATUS_TORA_ACTIVE = 1 Y_CABLE_STATUS_TORB_ACTIVE = 2 @@ -428,11 +415,11 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st continue if logical_port_name in port_table_keys[asic_index]: - if value == SFP_STATUS_INSERTED: + if value == sfp_status_helper.SFP_STATUS_INSERTED: helper_logger.log_info("Got SFP inserted event") check_identifier_presence_and_update_mux_table_entry( state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence) - elif value == SFP_STATUS_REMOVED or is_error_sfp_status(value): + elif value == sfp_status_helper.SFP_STATUS_REMOVED or sfp_status_helper.is_error_block_eeprom_reading(value): check_identifier_presence_and_delete_mux_table_entry( state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) @@ -983,13 +970,6 @@ def post_mux_info_to_db(is_warm_start, stop_event=threading.Event()): post_port_mux_info_to_db(logical_port_name, mux_tbl[asic_index]) -def is_error_sfp_status(status): - int_status = int(status) - for error_code in SFP_STATUS_ERR_DICT.keys(): - if int_status & error_code: - return True - return False - # Thread wrapper class to update y_cable status periodically class YCableTableUpdateTask(object): def __init__(self): From 5ad12464a6ed3cce769fc6308e49d43e58104038 Mon Sep 17 00:00:00 2001 From: junchao Date: Wed, 28 Apr 2021 10:02:45 +0800 Subject: [PATCH 4/9] Remove unused comment lines --- sonic-xcvrd/tests/test_xcvrd.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 7105fbb36..b9c112f13 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -24,12 +24,9 @@ test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) scripts_path = os.path.join(modules_path, "xcvrd") -#helper_file_path = os.path.join(scripts_path, "xcvrd_utilities"+"/y_cable_helper.py") sys.path.insert(0, modules_path) os.environ["XCVRD_UNIT_TESTING"] = "1" -#load_source('y_cable_helper', scripts_path + '/xcvrd_utilities/y_cable_helper.py') -#load_source('sfp_status_helper', scripts_path + '/xcvrd_utilities/sfp_status_helper.py') from xcvrd.xcvrd_utilities.y_cable_helper import * from xcvrd.xcvrd import * from xcvrd.xcvrd_utilities.sfp_status_helper import * From 5bbc490f0c6e5f4dad8bdeff5231dd1ea7d753b8 Mon Sep 17 00:00:00 2001 From: Junchao Chen Date: Mon, 17 May 2021 08:43:57 +0800 Subject: [PATCH 5/9] Fix LGTM warning --- sonic-xcvrd/xcvrd/xcvrd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 86f1fdf8d..4ff46e018 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -15,7 +15,6 @@ import threading import time - from enum import Enum from sonic_py_common import daemon_base, device_info, logger from sonic_py_common import multi_asic from swsscommon import swsscommon From fc36b01ed191308d14a753967757b8270719fcb8 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Mon, 17 May 2021 08:47:50 +0800 Subject: [PATCH 6/9] Fix LGTM warning --- sonic-xcvrd/xcvrd/xcvrd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 86f1fdf8d..4ff46e018 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -15,7 +15,6 @@ import threading import time - from enum import Enum from sonic_py_common import daemon_base, device_info, logger from sonic_py_common import multi_asic from swsscommon import swsscommon From 1ad32df123d8dea27710888120d64f6380a3dcac Mon Sep 17 00:00:00 2001 From: junchao Date: Wed, 19 May 2021 10:10:01 +0800 Subject: [PATCH 7/9] Remove unused import --- sonic-xcvrd/tests/test_xcvrd.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index b9c112f13..12dc8d117 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1,10 +1,7 @@ import os import sys -import subprocess -import pytest import unittest -from imp import load_source if sys.version_info >= (3, 3): from unittest.mock import MagicMock, patch else: @@ -27,8 +24,8 @@ sys.path.insert(0, modules_path) os.environ["XCVRD_UNIT_TESTING"] = "1" -from xcvrd.xcvrd_utilities.y_cable_helper import * from xcvrd.xcvrd import * +from xcvrd.xcvrd_utilities.y_cable_helper import * from xcvrd.xcvrd_utilities.sfp_status_helper import * From 5a5aac4bbb6b7baba934365dcfc080b218f2b749 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Mon, 31 May 2021 08:59:42 +0800 Subject: [PATCH 8/9] Handle the error status returned by platform APIs In case there is an error returned by platform API - Translate the error bitmap to error description for generic errors - Fetch the error description from sfp_error dict or via calling platform API get_error_description for vendor specific errors - Adjust unit test cases Signed-off-by: Stephen Sun --- sonic-xcvrd/tests/test_xcvrd.py | 49 ++---------- sonic-xcvrd/xcvrd/xcvrd.py | 74 ++++++++++--------- .../xcvrd_utilities/sfp_status_helper.py | 47 ++++++------ .../xcvrd/xcvrd_utilities/y_cable_helper.py | 10 ++- 4 files changed, 80 insertions(+), 100 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 12dc8d117..94ceb84fd 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -9,6 +9,7 @@ from sonic_py_common import daemon_base from swsscommon import swsscommon +from sonic_platform_base.sfp_base import SfpBase from .mock_swsscommon import Table @@ -315,46 +316,6 @@ def test_get_media_settings_key(self): assert result == ['MOLEX-1064141421', 'QSFP+'] # TODO: Ensure that error message was logged - def test_update_port_transceiver_status_table(self): - logical_port_name = "Ethernet0" - status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) - update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED) - entry = status_tbl.get(logical_port_name) - print(entry[1]) - print(entry[0][0]) - assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED - assert status_tbl.get(logical_port_name)[1][1] == 'N/A' - - update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) - assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_REMOVED - assert status_tbl.get(logical_port_name)[1][1] == 'N/A' - - error_dict = { - '3': 'SFP_STATUS_ERR_I2C_STUCK', - '5': 'SFP_STATUS_ERR_BAD_EEPROM', - '9': 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', - '17': 'SFP_STATUS_ERR_HIGH_TEMP', - '33': 'SFP_STATUS_ERR_BAD_CABLE' - } - - # Test single errors - for error_value, error_msg in error_dict.items(): - update_port_transceiver_status_table(logical_port_name, status_tbl, error_value, True) - assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED - assert status_tbl.get(logical_port_name)[1][1] == error_msg - - # Test multiple errors - update_port_transceiver_status_table(logical_port_name, status_tbl, '63', True) - assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED - error = status_tbl.get(logical_port_name)[1][1] - for error_msg in error_dict.values(): - assert error_msg in error - - # Test unsupported errors - status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) - update_port_transceiver_status_table(logical_port_name, status_tbl, '1024', True) - assert status_tbl.get(logical_port_name) is None - def test_detect_port_in_error_status(self): class MockTable: def get(self, key): @@ -364,13 +325,13 @@ def get(self, key): status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'})) assert not detect_port_in_error_status(None, status_tbl) - status_tbl.get = MagicMock(return_value=(True, {'error': 'SFP_STATUS_ERR_I2C_STUCK'})) + status_tbl.get = MagicMock(return_value=(True, {'error': SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING})) assert detect_port_in_error_status(None, status_tbl) def test_is_error_sfp_status(self): - error_values = ['3', '5', '9', '17', '33'] + error_values = [7, 11, 19, 35] for error_value in error_values: assert is_error_block_eeprom_reading(error_value) - assert not is_error_block_eeprom_reading(SFP_STATUS_INSERTED) - assert not is_error_block_eeprom_reading(SFP_STATUS_REMOVED) + assert not is_error_block_eeprom_reading(int(SFP_STATUS_INSERTED)) + assert not is_error_block_eeprom_reading(int(SFP_STATUS_REMOVED)) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 4ff46e018..610216a9e 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -176,11 +176,13 @@ def _wrapper_get_transceiver_change_event(timeout): if platform_chassis is not None: try: status, events = platform_chassis.get_change_event(timeout) - sfp_events = events['sfp'] - return status, sfp_events + sfp_events = events.get('sfp') + sfp_errors = events.get('sfp_error') + return status, sfp_events, sfp_errors except NotImplementedError: pass - return platform_sfputil.get_transceiver_change_event(timeout) + status, events = platform_sfputil.get_transceiver_change_event(timeout) + return status, events, None def _wrapper_get_sfp_type(physical_port): @@ -191,6 +193,14 @@ def _wrapper_get_sfp_type(physical_port): pass return None + +def _wrapper_get_sfp_error_description(physical_port): + if platform_chassis: + try: + return platform_chassis.get_sfp(physical_port).get_error_description() + except NotImplementedError: + pass + return None # Remove unnecessary unit from the raw data @@ -759,22 +769,9 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait): # Update port SFP status table on receiving SFP change event -def update_port_transceiver_status_table(logical_port_name, status_tbl, status, has_error=False): - if not has_error: - fvs = swsscommon.FieldValuePairs([('status', status), ('error', 'N/A')]) - status_tbl.set(logical_port_name, fvs) - else: - error_list = [] - int_status = int(status) - for error_code, error_msg in sfp_status_helper.SFP_STATUS_ERR_DICT.items(): - if error_code & int_status: - error_list.append(error_msg) - if error_list: - fvs = swsscommon.FieldValuePairs([('status', str(int_status & 1)), ('error', '|'.join(error_list))]) - status_tbl.set(logical_port_name, fvs) - else: - # SFP return unkown event, just ignore for now. - helper_logger.log_warning("Got unknown event {}, ignored".format(status)) +def update_port_transceiver_status_table(logical_port_name, status_tbl, status, error_descriptions='N/A'): + fvs = swsscommon.FieldValuePairs([('status', status), ('error', error_descriptions)]) + status_tbl.set(logical_port_name, fvs) # Delete port from SFP status table @@ -1003,7 +1000,7 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): while not stopping_event.is_set(): next_state = state time_start = time.time() - status, port_dict = _wrapper_get_transceiver_change_event(timeout) + status, port_dict, error_dict = _wrapper_get_transceiver_change_event(timeout) if not port_dict: continue helper_logger.log_debug("Got event {} {} in state {}".format(status, port_dict, state)) @@ -1083,21 +1080,32 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): helper_logger.log_info("Got SFP removed event") update_port_transceiver_status_table( logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) - helper_logger.log_info("receive plug out and pdate port sfp status table.") + helper_logger.log_info("receive plug out and update port sfp status table.") del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index]) else: - helper_logger.log_info("Got SFP Error event") - # Add port to error table to stop accessing eeprom of it - # If the port already in the error table, the stored error code will - # be updated to the new one. - update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, True) - helper_logger.log_info("receive error update port sfp status table.") - # In this case EEPROM is not accessible, so remove the DOM info - # since it will be outdated if long time no update. - # but will keep the interface info in the DB since it static. - if sfp_status_helper.is_error_block_eeprom_reading(value): - del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) - + try: + error_bits = int(value) + helper_logger.log_info("Got SFP error event {}".format(value)) + + error_descriptions = sfp_status_helper.fetch_generic_error_description(error_bits) + + if sfp_status_helper.has_vendor_specific_error(error_bits): + if error_dict: + vendor_specific_error_description = error_dict.get(key) + else: + vendor_specific_error_description = _wrapper_get_sfp_error_description(key) + error_descriptions.append(vendor_specific_error_description) + + # Add error info to database + # Any existing error will be replaced by the new one. + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, '|'.join(error_descriptions)) + helper_logger.log_info("Receive error update port sfp status table.") + # In this case EEPROM is not accessible. The DOM info will be removed since it can be out-of-date. + # The interface info remains in the DB since it is static. + if sfp_status_helper.is_error_block_eeprom_reading(error_bits): + del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) + except (TypeError, ValueError) as e: + logger.log_error("Got unrecognized event {}, ignored".format(value)) # Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port y_cable_helper.change_ports_status_for_y_cable_change_event( diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py index 087200ac6..c65fed546 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py @@ -1,35 +1,38 @@ +from sonic_platform_base.sfp_base import SfpBase + # SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase SFP_STATUS_REMOVED = '0' SFP_STATUS_INSERTED = '1' # SFP error code dictinary, new elements can be added if new errors need to be supported. -SFP_STATUS_ERR_DICT = { - 2: 'SFP_STATUS_ERR_I2C_STUCK', - 4: 'SFP_STATUS_ERR_BAD_EEPROM', - 8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', - 16: 'SFP_STATUS_ERR_HIGH_TEMP', - 32: 'SFP_STATUS_ERR_BAD_CABLE' -} - -error_code_block_eeprom_reading = set((error_code for error_code in SFP_STATUS_ERR_DICT.keys())) -error_str_block_eeprom_reading = set((error for error in SFP_STATUS_ERR_DICT.values())) - - -def is_error_block_eeprom_reading(status): - int_status = int(status) - for error_code in error_code_block_eeprom_reading: - if int_status & error_code: - return True - return False +SFP_ERRORS_BLOCKING_MASK = 0x02 +SFP_ERRORS_GENERIC_MASK = 0x0000FFFE +SFP_ERRORS_VENDOR_SPECIFIC_MASK = 0xFFFF0000 + +def is_error_block_eeprom_reading(error_bits): + return 0 != (error_bits & SFP_ERRORS_BLOCKING_MASK) + + +def has_vendor_specific_error(error_bits): + return 0 != (error_bits & SFP_ERRORS_VENDOR_SPECIFIC_MASK) + + +def fetch_generic_error_description(error_bits): + generic_error_bits = (error_bits & SFP_ERRORS_GENERIC_MASK) + error_descriptions = [] + if generic_error_bits: + for error_bit, error_description in SfpBase.SFP_ERROR_BIT_TO_DESCRIPTION_DICT.items(): + if error_bit & generic_error_bits: + error_descriptions.append(error_description) + return error_descriptions def detect_port_in_error_status(logical_port_name, status_tbl): rec, fvp = status_tbl.get(logical_port_name) if rec: status_dict = dict(fvp) - if 'error' in status_dict: - for error in error_str_block_eeprom_reading: - if error in status_dict['error']: - return True + error = status_dict.get('error') + if SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING in error: + return True return False diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py index a19cfac34..e48084e52 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py @@ -419,11 +419,19 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st helper_logger.log_info("Got SFP inserted event") check_identifier_presence_and_update_mux_table_entry( state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence) - elif value == sfp_status_helper.SFP_STATUS_REMOVED or sfp_status_helper.is_error_block_eeprom_reading(value): + elif value == sfp_status_helper.SFP_STATUS_REMOVED: check_identifier_presence_and_delete_mux_table_entry( state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) else: + try: + # Now that the value is in bitmap format, let's convert it to number + event_bits = int(value) + if sfp_status_helper.is_error_block_eeprom_reading(event_bits): + check_identifier_presence_and_delete_mux_table_entry( + state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) + except: + pass # SFP return unkown event, just ignore for now. helper_logger.log_warning("Got unknown event {}, ignored".format(value)) continue From 08ab761b4fbd76bac837e03be90ddb7f41b8787f Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Fri, 4 Jun 2021 11:14:17 +0000 Subject: [PATCH 9/9] Fix review comments Signed-off-by: Stephen Sun --- sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py index c65fed546..789b761e4 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py @@ -32,7 +32,6 @@ def detect_port_in_error_status(logical_port_name, status_tbl): if rec: status_dict = dict(fvp) error = status_dict.get('error') - if SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING in error: - return True + return SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING in error return False