From 1fcaa570047c6d1d0a7c77bf20136cfa4aea40c3 Mon Sep 17 00:00:00 2001 From: Arun Saravanan Balachandran <52521751+ArunSaravananBalachandran@users.noreply.github.com> Date: Tue, 26 Jan 2021 21:14:29 +0000 Subject: [PATCH] [pcied] Add PCIe AER stats collection (#100) In pcied, added support to collect AER stats belonging to different severities for AER supported PCIe devices and update it in STATE_DB. The key used to represent a PCIE device for storing its AER stats in STATE_DB is of the format PCIE_DEVICE|:.. For every device, AER stats will be stored as key, value pairs where key is of the format | and the device ID will be stored with key id. HLD: Azure/SONiC#678, Azure/SONiC#720 Depends on: Azure/sonic-platform-common#144 --- sonic-pcied/scripts/pcied | 51 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 0b636673b7f5..3c465c1daf01 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -8,13 +8,12 @@ try: import os import signal - import subprocess import sys import threading import swsssdk - from sonic_py_common.daemon_base import DaemonBase - from sonic_py_common import device_info + from sonic_py_common import daemon_base, device_info + from swsscommon import swsscommon except ImportError as e: raise ImportError(str(e) + " - required module not found") @@ -25,6 +24,7 @@ SYSLOG_IDENTIFIER = "pcied" PCIE_RESULT_REGEX = "PCIe Device Checking All Test" PCIE_TABLE_NAME = "PCIE_STATUS" +PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE" PCIE_CONF_FILE = 'pcie.yaml' @@ -36,7 +36,7 @@ REDIS_HOSTIP = "127.0.0.1" # -class DaemonPcied(DaemonBase): +class DaemonPcied(daemon_base.DaemonBase): def __init__(self, log_identifier): super(DaemonPcied, self).__init__(log_identifier) @@ -52,6 +52,31 @@ class DaemonPcied(DaemonBase): self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) self.state_db.connect("STATE_DB") + state_db = daemon_base.db_connect("STATE_DB") + self.device_table = swsscommon.Table(state_db, PCIE_DEVICE_TABLE_NAME) + + # Load AER-fields into STATEDB + def update_aer_to_statedb(self, device_name, aer_stats): + + aer_fields = {} + + for field, value in aer_stats['correctable'].items(): + correctable_field = "correctable|" + field + aer_fields[correctable_field] = value + + for field, value in aer_stats['fatal'].items(): + fatal_field = "fatal|" + field + aer_fields[fatal_field] = value + + for field, value in aer_stats['non_fatal'].items(): + non_fatal_field = "non_fatal|" + field + aer_fields[non_fatal_field] = value + + if aer_fields: + formatted_fields = swsscommon.FieldValuePairs(list(aer_fields.items())) + self.device_table.set(device_name, formatted_fields) + else: + self.log_debug("PCIe device {} has no AER attriutes".format(device_name)) # Check the PCIe devices def check_pcie_devices(self): @@ -84,6 +109,24 @@ class DaemonPcied(DaemonBase): self.update_state_db("PCIE_DEVICES", "status", "PASSED") self.log_info("PCIe device status check : PASSED") + # update AER-attributes to DB + for item in resultInfo: + if item["result"] == "Failed": + continue + + Bus = int(item["bus"], 16) + Dev = int(item["dev"], 16) + Fn = int(item["fn"], 16) + + device_name = "%02x:%02x.%d" % (Bus, Dev, Fn) + dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name + with open(dev_id_path, 'r') as fd: + Id = fd.read().strip() + + self.device_table.set(device_name, [('id', Id)]) + aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, device=Dev, func=Fn) + self.update_aer_to_statedb(device_name, aer_stats) + def read_state_db(self, key1, key2): return self.state_db.get('STATE_DB', key1, key2)