From 19d4042025b6d62ca8d4ed0ef244a3c082f4e9e6 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Tue, 16 Mar 2021 20:32:24 -0700 Subject: [PATCH] Add self timeout and crash if exceeded. (#1502) Log callstack on timeout. What I did Add self timeout and crash on timeout. Before crash log the error and call stack. How I did it Add a signal based alarm and the handler to print error & call stack. How to verify it Artificially introduce a sleep (> TIMEOUT, which is 2mins) in the script in any function that is in the call path. Invoke the script. Watch it crash and note the error & stack logged in syslog. --- scripts/route_check.py | 19 ++++++++++++++++++- tests/route_check_test.py | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/scripts/route_check.py b/scripts/route_check.py index e282d166af03..fbc223b6d976 100755 --- a/scripts/route_check.py +++ b/scripts/route_check.py @@ -43,6 +43,8 @@ import sys import syslog import time +import signal +import traceback from swsscommon import swsscommon @@ -53,6 +55,9 @@ SUBSCRIBE_WAIT_SECS = 1 +# Max of 2 minutes +TIMEOUT_SECONDS = 120 + UNIT_TESTING = 0 os.environ['PYTHONUNBUFFERED']='True' @@ -75,6 +80,14 @@ def __str__(self): report_level = syslog.LOG_ERR write_to_syslog = False +def handler(signum, frame): + print_message(syslog.LOG_ERR, + "Aborting routeCheck.py upon timeout signal after {} seconds". + format(TIMEOUT_SECONDS)) + print_message(syslog.LOG_ERR, str(traceback.extract_stack())) + raise Exception("timeout occurred") + + def set_level(lvl, log_to_syslog): """ Sets the log level @@ -429,7 +442,7 @@ def main(): parser=argparse.ArgumentParser(description="Verify routes between APPL-DB & ASIC-DB are in sync") parser.add_argument('-m', "--mode", type=Level, choices=list(Level), default='ERR') parser.add_argument("-i", "--interval", type=int, default=0, help="Scan interval in seconds") - parser.add_argument("-s", "--log_to_syslog", action="store_true", default=False, help="Write message to syslog") + parser.add_argument("-s", "--log_to_syslog", action="store_true", default=True, help="Write message to syslog") args = parser.parse_args() set_level(args.mode, args.log_to_syslog) @@ -444,8 +457,12 @@ def main(): if UNIT_TESTING: interval = 1 + signal.signal(signal.SIGALRM, handler) + while True: + signal.alarm(TIMEOUT_SECONDS) ret, res= check_routes() + signal.alarm(0) if interval: time.sleep(interval) diff --git a/tests/route_check_test.py b/tests/route_check_test.py index c7c0d47b88fa..460fdd16d2fb 100644 --- a/tests/route_check_test.py +++ b/tests/route_check_test.py @@ -2,6 +2,7 @@ import json import os import sys +import time from unittest.mock import MagicMock, patch import pytest @@ -276,6 +277,7 @@ def table_side_effect(db, tbl): class mock_selector: TIMEOUT = 1 + EMULATE_HANG = False def __init__(self): self.select_state = 0 @@ -295,6 +297,9 @@ def select(self, timeout): state = self.select_state self.subs.update() + if mock_selector.EMULATE_HANG: + time.sleep(60) + if self.select_state == 0: self.select_state = self.TIMEOUT else: @@ -423,6 +428,21 @@ def test_server(self, mock_subs, mock_sel, mock_table, mock_conn): assert res == expect_res + # Test timeout + route_check.TIMEOUT_SECONDS = 5 + mock_selector.EMULATE_HANG = True + ex_raised = False + + try: + ret, res = route_check.main() + except Exception as err: + ex_raised = True + expect = "timeout occurred" + ex_str = str(err) + assert ex_str == expect, "{} != {}".format(ex_str, expect) + assert ex_raised, "Exception expected" + +