Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[xcvrd] Add bitmap support for SFP error event #184

Merged
merged 11 commits into from
Jun 22, 2021
77 changes: 68 additions & 9 deletions sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@
test_path = os.path.dirname(os.path.abspath(__file__))
modules_path = os.path.dirname(test_path)
scripts_path = os.path.join(modules_path, "xcvrd")
helper_file_path = os.path.join(scripts_path, "xcvrd_utilities"+"/y_cable_helper.py")
sys.path.insert(0, modules_path)

os.environ["XCVRD_UNIT_TESTING"] = "1"
load_source('y_cable_helper', scripts_path + '/xcvrd_utilities/y_cable_helper.py')
Junchao-Mellanox marked this conversation as resolved.
Show resolved Hide resolved
from y_cable_helper import *
from xcvrd.xcvrd_utilities.y_cable_helper import *
from xcvrd.xcvrd import *
from xcvrd.xcvrd_utilities.sfp_status_helper import *
Junchao-Mellanox marked this conversation as resolved.
Show resolved Hide resolved


class TestXcvrdScript(object):
Expand Down Expand Up @@ -219,9 +218,9 @@ def test_init_port_sfp_status_tbl(self):
init_port_sfp_status_tbl(stop_event)

@patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0]))
@patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0]))
@patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True))
@patch('y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self',
@patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0]))
@patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True))
@patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self',
'mux_direction': 'self',
'manual_switch_count': '7',
'auto_switch_count': '71',
Expand Down Expand Up @@ -258,9 +257,9 @@ def test_post_port_mux_info_to_db(self):
assert(rc != -1)

@patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0]))
@patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0]))
@patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True))
@patch('y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self',
@patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0]))
@patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True))
@patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self',
'nic_lane1_precursor1': '1',
'nic_lane1_precursor2': '-7',
'nic_lane1_maincursor': '-1',
Expand Down Expand Up @@ -318,3 +317,63 @@ def test_get_media_settings_key(self):
result = get_media_settings_key(0, xcvr_info_dict)
assert result == ['MOLEX-1064141421', 'QSFP+']
# TODO: Ensure that error message was logged

def test_update_port_transceiver_status_table(self):
logical_port_name = "Ethernet0"
status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED)
entry = status_tbl.get(logical_port_name)
print(entry[1])
print(entry[0][0])
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
assert status_tbl.get(logical_port_name)[1][1] == 'N/A'

update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED)
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_REMOVED
assert status_tbl.get(logical_port_name)[1][1] == 'N/A'

error_dict = {
'3': 'SFP_STATUS_ERR_I2C_STUCK',
'5': 'SFP_STATUS_ERR_BAD_EEPROM',
'9': 'SFP_STATUS_ERR_UNSUPPORTED_CABLE',
'17': 'SFP_STATUS_ERR_HIGH_TEMP',
'33': 'SFP_STATUS_ERR_BAD_CABLE'
}

# Test single errors
for error_value, error_msg in error_dict.items():
update_port_transceiver_status_table(logical_port_name, status_tbl, error_value, True)
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
assert status_tbl.get(logical_port_name)[1][1] == error_msg

# Test multiple errors
update_port_transceiver_status_table(logical_port_name, status_tbl, '63', True)
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
error = status_tbl.get(logical_port_name)[1][1]
for error_msg in error_dict.values():
assert error_msg in error

# Test unsupported errors
status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
update_port_transceiver_status_table(logical_port_name, status_tbl, '1024', True)
assert status_tbl.get(logical_port_name) is None

def test_detect_port_in_error_status(self):
class MockTable:
def get(self, key):
pass

status_tbl = MockTable()
status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'}))
assert not detect_port_in_error_status(None, status_tbl)

status_tbl.get = MagicMock(return_value=(True, {'error': 'SFP_STATUS_ERR_I2C_STUCK'}))
assert detect_port_in_error_status(None, status_tbl)

def test_is_error_sfp_status(self):
error_values = ['3', '5', '9', '17', '33']
for error_value in error_values:
assert is_error_block_eeprom_reading(error_value)

assert not is_error_block_eeprom_reading(SFP_STATUS_INSERTED)
assert not is_error_block_eeprom_reading(SFP_STATUS_REMOVED)
77 changes: 31 additions & 46 deletions sonic-xcvrd/xcvrd/xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
import threading
import time

from enum import Enum
from sonic_py_common import daemon_base, device_info, logger
from sonic_py_common import multi_asic
from swsscommon import swsscommon

from .xcvrd_utilities import sfp_status_helper
from .xcvrd_utilities import y_cable_helper
except ImportError as e:
raise ImportError(str(e) + " - required module not found")
Expand All @@ -43,18 +43,6 @@
TIME_FOR_SFP_READY_SECS = 1
XCVRD_MAIN_THREAD_SLEEP_SECS = 60

# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase
SFP_STATUS_REMOVED = '0'
SFP_STATUS_INSERTED = '1'

# SFP error code enum, new elements can be added to the enum if new errors need to be supported.
SFP_STATUS_ERR_ENUM = Enum('SFP_STATUS_ERR_ENUM', ['SFP_STATUS_ERR_I2C_STUCK', 'SFP_STATUS_ERR_BAD_EEPROM',
'SFP_STATUS_ERR_UNSUPPORTED_CABLE', 'SFP_STATUS_ERR_HIGH_TEMP',
'SFP_STATUS_ERR_BAD_CABLE'], start=2)

# Convert the error code to string and store them in a set for convenience
errors_block_eeprom_reading = set(str(error_code.value) for error_code in SFP_STATUS_ERR_ENUM)

EVENT_ON_ALL_SFP = '-1'
# events definition
SYSTEM_NOT_READY = 'system_not_ready'
Expand Down Expand Up @@ -540,7 +528,7 @@ def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event)
continue

keys = int_tbl[asic_index].getKeys()
if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]):
if logical_port_name not in keys and not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]):
post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event)


Expand Down Expand Up @@ -771,30 +759,30 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait):
# Update port SFP status table on receiving SFP change event


def update_port_transceiver_status_table(logical_port_name, status_tbl, status):
fvs = swsscommon.FieldValuePairs([('status', status)])
status_tbl.set(logical_port_name, fvs)
def update_port_transceiver_status_table(logical_port_name, status_tbl, status, has_error=False):
if not has_error:
fvs = swsscommon.FieldValuePairs([('status', status), ('error', 'N/A')])
status_tbl.set(logical_port_name, fvs)
else:
error_list = []
int_status = int(status)
for error_code, error_msg in sfp_status_helper.SFP_STATUS_ERR_DICT.items():
if error_code & int_status:
error_list.append(error_msg)
if error_list:
fvs = swsscommon.FieldValuePairs([('status', str(int_status & 1)), ('error', '|'.join(error_list))])
status_tbl.set(logical_port_name, fvs)
else:
# SFP return unkown event, just ignore for now.
helper_logger.log_warning("Got unknown event {}, ignored".format(status))


# Delete port from SFP status table


def delete_port_from_status_table(logical_port_name, status_tbl):
status_tbl._del(logical_port_name)

# Check whether port in error status


def detect_port_in_error_status(logical_port_name, status_tbl):
rec, fvp = status_tbl.get(logical_port_name)
if rec:
status_dict = dict(fvp)
if status_dict['status'] in errors_block_eeprom_reading:
return True
else:
return False
else:
return False

# Init TRANSCEIVER_STATUS table


Expand Down Expand Up @@ -824,16 +812,16 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()):
physical_port_list = logical_port_name_to_physical_port_list(logical_port_name)
if physical_port_list is None:
helper_logger.log_error("No physical ports found for logical port '{}'".format(logical_port_name))
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED)
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED)

for physical_port in physical_port_list:
if stop_event.is_set():
break

if not _wrapper_get_presence(physical_port):
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED)
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED)
else:
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_INSERTED)
update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED)

#
# Helper classes ===============================================================
Expand Down Expand Up @@ -872,7 +860,7 @@ def task_worker(self, y_cable_presence):
logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name))
continue

if not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]):
if not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]):
post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event)
post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event)
if y_cable_presence[0] is True:
Expand Down Expand Up @@ -1075,11 +1063,11 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence):
logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port))
continue

if value == SFP_STATUS_INSERTED:
if value == sfp_status_helper.SFP_STATUS_INSERTED:
helper_logger.log_info("Got SFP inserted event")
# A plugin event will clear the error state.
update_port_transceiver_status_table(
logical_port, status_tbl[asic_index], SFP_STATUS_INSERTED)
logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED)
helper_logger.log_info("receive plug in and update port sfp status table.")
rc = post_port_sfp_info_to_db(logical_port, int_tbl[asic_index], transceiver_dict)
# If we didn't get the sfp info, assuming the eeprom is not ready, give a try again.
Expand All @@ -1091,28 +1079,25 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence):
post_port_dom_threshold_info_to_db(logical_port, dom_tbl[asic_index])
notify_media_setting(logical_port, transceiver_dict, app_port_tbl[asic_index])
transceiver_dict.clear()
elif value == SFP_STATUS_REMOVED:
elif value == sfp_status_helper.SFP_STATUS_REMOVED:
helper_logger.log_info("Got SFP removed event")
update_port_transceiver_status_table(
logical_port, status_tbl[asic_index], SFP_STATUS_REMOVED)
logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED)
helper_logger.log_info("receive plug out and pdate port sfp status table.")
del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index])
elif value in errors_block_eeprom_reading:
else:
helper_logger.log_info("Got SFP Error event")
# Add port to error table to stop accessing eeprom of it
# If the port already in the error table, the stored error code will
# be updated to the new one.
update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value)
update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, True)
helper_logger.log_info("receive error update port sfp status table.")
# In this case EEPROM is not accessible, so remove the DOM info
# since it will be outdated if long time no update.
# but will keep the interface info in the DB since it static.
del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index])
if sfp_status_helper.is_error_block_eeprom_reading(value):
del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index])

else:
# SFP return unkown event, just ignore for now.
helper_logger.log_warning("Got unknown event {}, ignored".format(value))
continue

# Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port
y_cable_helper.change_ports_status_for_y_cable_change_event(
Expand Down
35 changes: 35 additions & 0 deletions sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase
SFP_STATUS_REMOVED = '0'
SFP_STATUS_INSERTED = '1'

# SFP error code dictinary, new elements can be added if new errors need to be supported.
SFP_STATUS_ERR_DICT = {
2: 'SFP_STATUS_ERR_I2C_STUCK',
4: 'SFP_STATUS_ERR_BAD_EEPROM',
8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE',
16: 'SFP_STATUS_ERR_HIGH_TEMP',
32: 'SFP_STATUS_ERR_BAD_CABLE'
}

error_code_block_eeprom_reading = set((error_code for error_code in SFP_STATUS_ERR_DICT.keys()))
error_str_block_eeprom_reading = set((error for error in SFP_STATUS_ERR_DICT.values()))


def is_error_block_eeprom_reading(status):
int_status = int(status)
for error_code in error_code_block_eeprom_reading:
if int_status & error_code:
return True
return False


def detect_port_in_error_status(logical_port_name, status_tbl):
rec, fvp = status_tbl.get(logical_port_name)
if rec:
status_dict = dict(fvp)
if 'error' in status_dict:
for error in error_str_block_eeprom_reading:
if error in status_dict['error']:
return True
return False

26 changes: 3 additions & 23 deletions sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sonic_py_common import multi_asic
from sonic_y_cable import y_cable
from swsscommon import swsscommon
from . import sfp_status_helper


SELECT_TIMEOUT = 1000
Expand All @@ -21,27 +22,6 @@

helper_logger = logger.Logger(SYSLOG_IDENTIFIER)


# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase
SFP_STATUS_REMOVED = '0'
SFP_STATUS_INSERTED = '1'

# SFP error codes, stored as strings. Can add more as needed.
SFP_STATUS_ERR_I2C_STUCK = '2'
SFP_STATUS_ERR_BAD_EEPROM = '3'
SFP_STATUS_ERR_UNSUPPORTED_CABLE = '4'
SFP_STATUS_ERR_HIGH_TEMP = '5'
SFP_STATUS_ERR_BAD_CABLE = '6'

# Store the error codes in a set for convenience
errors_block_eeprom_reading = {
SFP_STATUS_ERR_I2C_STUCK,
SFP_STATUS_ERR_BAD_EEPROM,
SFP_STATUS_ERR_UNSUPPORTED_CABLE,
SFP_STATUS_ERR_HIGH_TEMP,
SFP_STATUS_ERR_BAD_CABLE
}

Y_CABLE_STATUS_NO_TOR_ACTIVE = 0
Y_CABLE_STATUS_TORA_ACTIVE = 1
Y_CABLE_STATUS_TORB_ACTIVE = 2
Expand Down Expand Up @@ -435,11 +415,11 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st
continue

if logical_port_name in port_table_keys[asic_index]:
if value == SFP_STATUS_INSERTED:
if value == sfp_status_helper.SFP_STATUS_INSERTED:
helper_logger.log_info("Got SFP inserted event")
check_identifier_presence_and_update_mux_table_entry(
state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence)
elif value == SFP_STATUS_REMOVED or value in errors_block_eeprom_reading:
elif value == sfp_status_helper.SFP_STATUS_REMOVED or sfp_status_helper.is_error_block_eeprom_reading(value):
check_identifier_presence_and_delete_mux_table_entry(
state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event)

Expand Down