From 057640237867d0bb6b7757edc03d2e5f1ea93c7b Mon Sep 17 00:00:00 2001 From: Arun Saravanan Balachandran Date: Tue, 13 Oct 2020 12:39:03 +0530 Subject: [PATCH 1/3] [pcied] Add PCIe AER stats collection --- sonic-pcied/scripts/pcied | 67 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 31312dd4c..acdef867c 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -8,10 +8,10 @@ try: import os import signal - import subprocess import sys import threading + import redis import swsssdk from sonic_py_common.daemon_base import DaemonBase from sonic_py_common import device_info @@ -30,6 +30,7 @@ PCIE_CONF_FILE = 'pcie.yaml' PCIED_MAIN_THREAD_SLEEP_SECS = 60 REDIS_HOSTIP = "127.0.0.1" +STATE_DB_INDEX = 6 # # Daemon ======================================================================= @@ -53,6 +54,49 @@ class DaemonPcied(DaemonBase): self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) self.state_db.connect("STATE_DB") + # Load AER-fields into STATEDB + def update_aer_to_statedb(self, device, device_name, pcie_hash_name): + + aer_fields = {} + client = redis.Redis(db=STATE_DB_INDEX) + + # construct AER sysfs filepath + correctable_path = os.path.join(device, "aer_dev_correctable") + fatal_path = os.path.join(device, "aer_dev_fatal") + non_fatal_path = os.path.join(device, "aer_dev_nonfatal") + + # update AER-correctable fields + if os.path.isfile(correctable_path): + with open(correctable_path, 'r') as fh: + lines = fh.readlines() + for line in lines: + correctable_field, value = line.split() + correctable_field = "correctable|" + correctable_field + aer_fields[correctable_field] = value + + # update AER-Fatal fields + if os.path.isfile(fatal_path): + with open(fatal_path, 'r') as fh: + lines = fh.readlines() + for line in lines: + fatal_field, value = line.split() + fatal_field = "fatal|" + fatal_field + aer_fields[fatal_field] = value + + # update AER-Fatal fields + if os.path.isfile(non_fatal_path): + with open(non_fatal_path, 'r') as fh: + lines = fh.readlines() + for line in lines: + non_fatal_field, value = line.split() + non_fatal_field = "non_fatal|" + non_fatal_field + aer_fields[non_fatal_field] = value + + if aer_fields: + client.hmset(pcie_hash_name, aer_fields) + else: + self.log_debug("PCIe device {} has no AER attriutes".format(device_name)) + # Check the PCIe devices def check_pcie_devices(self): try: @@ -84,6 +128,27 @@ class DaemonPcied(DaemonBase): self.update_state_db("PCIE_DEVICES", "status", "PASSED") self.log_info("PCIe device status check : PASSED") + # update AER-attributes to DB + for item in resultInfo: + if item["result"] == "Failed": + continue + + Bus = item["bus"] + Dev = item["dev"] + Fn = item["fn"] + Id = item["id"] + + # construct sysfs device path + device_path = "/sys/bus/pci/devices/0000:%s:%s.%s" % (Bus, Dev, Fn) + device = os.path.join(device_path, "device") + + if not os.path.isfile(device): + continue + + device_name = "0x%s|%s:%s.%s" % (Id, Bus, Dev, Fn) + pcie_dev_key = "PCIE_DEVICE|" + device_name + self.update_aer_to_statedb(device_path, device_name, pcie_dev_key) + def read_state_db(self, key1, key2): return self.state_db.get('STATE_DB', key1, key2) From 61bed35e29f4ae2b3ff54811546f3ab165d9d27b Mon Sep 17 00:00:00 2001 From: Arun Saravanan Balachandran Date: Mon, 16 Nov 2020 14:21:50 +0530 Subject: [PATCH 2/3] Use get_pcie_aer_stats, swsscommon and remove redis --- sonic-pcied/scripts/pcied | 72 +++++++++++++-------------------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index acdef867c..34eaa3f61 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -11,10 +11,9 @@ try: import sys import threading - import redis import swsssdk - from sonic_py_common.daemon_base import DaemonBase - from sonic_py_common import device_info + from sonic_py_common import daemon_base, device_info + from swsscommon import swsscommon except ImportError as e: raise ImportError(str(e) + " - required module not found") @@ -25,19 +24,19 @@ SYSLOG_IDENTIFIER = "pcied" PCIE_RESULT_REGEX = "PCIe Device Checking All Test" PCIE_TABLE_NAME = "PCIE_STATUS" +PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE" PCIE_CONF_FILE = 'pcie.yaml' PCIED_MAIN_THREAD_SLEEP_SECS = 60 REDIS_HOSTIP = "127.0.0.1" -STATE_DB_INDEX = 6 # # Daemon ======================================================================= # -class DaemonPcied(DaemonBase): +class DaemonPcied(daemon_base.DaemonBase): def __init__(self, log_identifier): super(DaemonPcied, self).__init__(log_identifier) @@ -53,47 +52,29 @@ class DaemonPcied(DaemonBase): self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) self.state_db.connect("STATE_DB") + state_db = daemon_base.db_connect("STATE_DB") + self.device_table = swsscommon.Table(state_db, PCIE_DEVICE_TABLE_NAME) # Load AER-fields into STATEDB - def update_aer_to_statedb(self, device, device_name, pcie_hash_name): + def update_aer_to_statedb(self, device_name, aer_stats): aer_fields = {} - client = redis.Redis(db=STATE_DB_INDEX) - - # construct AER sysfs filepath - correctable_path = os.path.join(device, "aer_dev_correctable") - fatal_path = os.path.join(device, "aer_dev_fatal") - non_fatal_path = os.path.join(device, "aer_dev_nonfatal") - - # update AER-correctable fields - if os.path.isfile(correctable_path): - with open(correctable_path, 'r') as fh: - lines = fh.readlines() - for line in lines: - correctable_field, value = line.split() - correctable_field = "correctable|" + correctable_field - aer_fields[correctable_field] = value - - # update AER-Fatal fields - if os.path.isfile(fatal_path): - with open(fatal_path, 'r') as fh: - lines = fh.readlines() - for line in lines: - fatal_field, value = line.split() - fatal_field = "fatal|" + fatal_field - aer_fields[fatal_field] = value - - # update AER-Fatal fields - if os.path.isfile(non_fatal_path): - with open(non_fatal_path, 'r') as fh: - lines = fh.readlines() - for line in lines: - non_fatal_field, value = line.split() - non_fatal_field = "non_fatal|" + non_fatal_field - aer_fields[non_fatal_field] = value + + for field, value in aer_stats['correctable'].items(): + correctable_field = "correctable|" + field + aer_fields[correctable_field] = value + + for field, value in aer_stats['fatal'].items(): + fatal_field = "fatal|" + field + aer_fields[fatal_field] = value + + for field, value in aer_stats['non_fatal'].items(): + non_fatal_field = "non_fatal|" + field + aer_fields[non_fatal_field] = value if aer_fields: - client.hmset(pcie_hash_name, aer_fields) + formatted_fields = swsscommon.FieldValuePairs(list(aer_fields.items())) + self.device_table.set(device_name, formatted_fields) else: self.log_debug("PCIe device {} has no AER attriutes".format(device_name)) @@ -138,16 +119,9 @@ class DaemonPcied(DaemonBase): Fn = item["fn"] Id = item["id"] - # construct sysfs device path - device_path = "/sys/bus/pci/devices/0000:%s:%s.%s" % (Bus, Dev, Fn) - device = os.path.join(device_path, "device") - - if not os.path.isfile(device): - continue - device_name = "0x%s|%s:%s.%s" % (Id, Bus, Dev, Fn) - pcie_dev_key = "PCIE_DEVICE|" + device_name - self.update_aer_to_statedb(device_path, device_name, pcie_dev_key) + aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=int(Bus, 16), device=int(Dev, 16), func=int(Fn, 16)) + self.update_aer_to_statedb(device_name, aer_stats) def read_state_db(self, key1, key2): return self.state_db.get('STATE_DB', key1, key2) From 9126bc98de50d173fbd0c3b80a32e8f7f2a3a56d Mon Sep 17 00:00:00 2001 From: Arun Saravanan Balachandran Date: Thu, 10 Dec 2020 12:19:37 +0530 Subject: [PATCH 3/3] Change PCIe device key format, add 'id' field --- sonic-pcied/scripts/pcied | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 34eaa3f61..885eba27f 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -114,13 +114,17 @@ class DaemonPcied(daemon_base.DaemonBase): if item["result"] == "Failed": continue - Bus = item["bus"] - Dev = item["dev"] - Fn = item["fn"] - Id = item["id"] + Bus = int(item["bus"], 16) + Dev = int(item["dev"], 16) + Fn = int(item["fn"], 16) - device_name = "0x%s|%s:%s.%s" % (Id, Bus, Dev, Fn) - aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=int(Bus, 16), device=int(Dev, 16), func=int(Fn, 16)) + device_name = "%02x:%02x.%d" % (Bus, Dev, Fn) + dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name + with open(dev_id_path, 'r') as fd: + Id = fd.read().strip() + + self.device_table.set(device_name, [('id', Id)]) + aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, device=Dev, func=Fn) self.update_aer_to_statedb(device_name, aer_stats) def read_state_db(self, key1, key2):