Skip to content

Commit

Permalink
[recover] improve adaptive recover methods (#1652)
Browse files Browse the repository at this point in the history
- When process is missing, reload_config.
- When port channle or vlan link is down, reload_config.
- redirect output of config reload and load minigraph to /dev/null.

Signed-off-by: Ying Xie <ying.xie@microsoft.com>
  • Loading branch information
yxieca authored May 12, 2020
1 parent 08748cb commit 6e6c81d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
4 changes: 2 additions & 2 deletions tests/common/plugins/sanity_check/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

# Recover related definitions
RECOVER_METHODS = {
"config_reload": {"cmd": "config reload -y", "reboot": False, "adaptive": False, 'recover_wait': 60},
"load_minigraph": {"cmd": "config load_minigraph -y", "reboot": False, "adaptive": False, 'recover_wait': 60},
"config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60},
"load_minigraph": {"cmd": "bash -c 'config load_minigraph -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60},
"reboot": {"cmd": "reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
"warm_reboot": {"cmd": "warm-reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
"fast_reboot": {"cmd": "fast_reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
Expand Down
28 changes: 19 additions & 9 deletions tests/common/plugins/sanity_check/recover.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,21 @@ def reboot_dut(dut, localhost, cmd, wait_time):


def __recover_interfaces(dut, fanouthosts, result, wait_time):
action = None
for port in result['down_ports']:
logging.info("Restoring port {}".format(port))

pn = str(port).lower()
if 'portchannel' in pn or 'vlan' in pn:
action = 'config_reload'
continue

fanout, fanout_port = fanout_switch_port_lookup(fanouthosts, port)
if fanout and fanout_port:
fanout.no_shutdown(fanout_port)
dut.no_shutdown(port)
wait(wait_time, msg="Wait {} seconds for interface(s) to restore.".format(wait_time))
return action


def __recover_services(dut, result):
Expand All @@ -57,16 +65,18 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time):
if result['failed']:
logging.info("Restoring {}".format(result))
if result['check_item'] == 'interfaces':
__recover_interfaces(dut, fanouthosts, result, wait_time)
elif result['check_item'] in ['services', 'processes']:
action = __recover_services(dut, result)
# Only allow outstanding_action be overridden when it is
# None. In case the outstanding_action has already been
# been set to 'reboot'.
if not outstanding_action:
outstanding_action = action
action = __recover_interfaces(dut, fanouthosts, result, wait_time)
elif result['check_item'] == 'services':
action = __recover_services(dut, result)
elif result['check_item'] == 'processes':
action = 'config_reload'
else:
outstanding_action = 'reboot'
action = 'reboot'

# Any action can override no action or 'config_reload'.
# 'reboot' is last resort and cannot be overridden.
if action and (not outstanding_action or outstanding_action == 'config_reload'):
outstanding_action = action

if outstanding_action:
method = constants.RECOVER_METHODS[outstanding_action]
Expand Down

0 comments on commit 6e6c81d

Please sign in to comment.