Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bgpcfg]: Batch bgp updates #6006

Merged
merged 5 commits into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 30 additions & 37 deletions src/sonic-bgpcfgd/bgpcfgd/config.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,66 @@
import os
import tempfile

from .vars import g_debug
from .log import log_crit, log_err
from .utils import run_command


class ConfigMgr(object):
""" The class represents frr configuration """
def __init__(self):
def __init__(self, frr):
self.frr = frr
self.current_config = None
self.current_config_raw = None
self.changes = ""
self.peer_groups_to_restart = []

def reset(self):
""" Reset stored config """
self.current_config = None
self.current_config_raw = None
self.changes = ""
self.peer_groups_to_restart = []

def update(self):
""" Read current config from FRR """
self.current_config = None
self.current_config_raw = None
ret_code, out, err = run_command(["vtysh", "-c", "show running-config"])
if ret_code != 0:
# FIXME: should we throw exception here?
log_crit("can't update running config: rc=%d out='%s' err='%s'" % (ret_code, out, err))
return
out = self.frr.get_config()
text = []
for line in out.split('\n'):
if line.lstrip().startswith('!'):
continue
text.append(line)
text += [" "] # Add empty line to have something to work on, if there is no text
self.current_config_raw = text
self.current_config = self.to_canonical(out) # FIXME: use test as an input
self.current_config = self.to_canonical(out) # FIXME: use text as an input

def push_list(self, cmdlist):
return self.push("\n".join(cmdlist))
"""
Prepare new changes for FRR. The changes should be committed by self.commit()
:param cmdlist: configuration change for FRR. Type: List of Strings
"""
self.changes += "\n".join(cmdlist) + "\n"

def push(self, cmd):
"""
Push new changes to FRR
Prepare new changes for FRR. The changes should be committed by self.commit()
:param cmd: configuration change for FRR. Type: String
:return: True if change was applied successfully, False otherwise
"""
return self.write(cmd)
self.changes += cmd + "\n"
return True

def restart_peer_groups(self, peer_groups):
"""
Schedule peer_groups for restart on commit
:param peer_groups: List of peer_groups
"""
self.peer_groups_to_restart.extend(peer_groups)

def write(self, cmd):
def commit(self):
"""
Write configuration change to FRR.
:param cmd: new configuration to write into FRR. Type: String
:return: True if change was applied successfully, False otherwise
"""
fd, tmp_filename = tempfile.mkstemp(dir='/tmp')
os.close(fd)
with open(tmp_filename, 'w') as fp:
fp.write("%s\n" % cmd)
command = ["vtysh", "-f", tmp_filename]
ret_code, out, err = run_command(command)
if not g_debug:
os.remove(tmp_filename)
if ret_code != 0:
err_tuple = str(cmd), ret_code, out, err
log_err("ConfigMgr::push(): can't push configuration '%s', rc='%d', stdout='%s', stderr='%s'" % err_tuple)
if ret_code == 0:
self.current_config = None # invalidate config
self.current_config_raw = None
return ret_code == 0
if self.changes.strip() == "":
return True
rc_write = self.frr.write(self.changes)
rc_restart = self.frr.restart_peer_groups(self.peer_groups_to_restart)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am not sure if it is always correct to restart peer group after we make any change. for example, if we shutdown a bgp session, which peer group to restart?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.peer_groups_to_restart would be empty in 'shutdown a bgp session" situation and restart_peer_groups will do nothing in this case. I update self.peer_groups_to_restart in two cases now: BBR and allow_prefix.

self.reset()
return rc_write and rc_restart

def get_text(self):
return self.current_config_raw
Expand All @@ -89,7 +83,6 @@ def to_canonical(raw_config):
for line in lines:
n_spaces = ConfigMgr.count_spaces(line)
s_line = line.strip()
# assert(n_spaces == cur_offset or (n_spaces + 1) == cur_offset or (n_spaces - 1) == cur_offset)
if n_spaces == cur_offset:
cur_path[-1] = s_line
elif n_spaces > cur_offset:
Expand Down
70 changes: 70 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/frr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import datetime
import time
import tempfile

from bgpcfgd.log import log_err, log_info, log_warn, log_crit
from .vars import g_debug
from .utils import run_command


class FRR(object):
"""Proxy object with FRR"""
def __init__(self, daemons):
self.daemons = daemons

def wait_for_daemons(self, seconds):
"""
Wait until FRR daemons are ready for requests
:param seconds: number of seconds to wait, until raise an error
"""
stop_time = datetime.datetime.now() + datetime.timedelta(seconds=seconds)
log_info("Start waiting for FRR daemons: %s" % str(datetime.datetime.now()))
while datetime.datetime.now() < stop_time:
ret_code, out, err = run_command(["vtysh", "-c", "show daemons"], hide_errors=True)
if ret_code == 0 and all(daemon in out for daemon in self.daemons):
log_info("All required daemons have connected to vtysh: %s" % str(datetime.datetime.now()))
return
else:
log_warn("Can't read daemon status from FRR: %s" % str(err))
time.sleep(0.1) # sleep 100 ms
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

time.sleep(0.1) # sleep 100 ms [](start = 12, length = 31)

What happens if no sleep?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Higher cpu usage. I think it is better to wait for 0.1 than to burn cpu by requests.

raise RuntimeError("FRR daemons hasn't been started in %d seconds" % seconds)

@staticmethod
def get_config():
ret_code, out, err = run_command(["vtysh", "-c", "show running-config"])
if ret_code != 0:
log_crit("can't update running config: rc=%d out='%s' err='%s'" % (ret_code, out, err))
return ""
return out

@staticmethod
def write(config_text):
fd, tmp_filename = tempfile.mkstemp(dir='/tmp')
os.close(fd)
with open(tmp_filename, 'w') as fp:
fp.write("%s\n" % config_text)
command = ["vtysh", "-f", tmp_filename]
ret_code, out, err = run_command(command)
if ret_code != 0:
err_tuple = tmp_filename, ret_code, out, err
log_err("ConfigMgr::commit(): can't push configuration from file='%s', rc='%d', stdout='%s', stderr='%s'" % err_tuple)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to keep this tmp file, or just put the tmpfile content into syslog?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tmp file content it is full config on the start. Kind of a lot. Do we want to see it in the syslog?

else:
if not g_debug:
os.remove(tmp_filename)
return ret_code == 0

@staticmethod
def restart_peer_groups(peer_groups):
""" Restart peer-groups which support BBR
:param peer_groups: List of peer_groups to restart
:return: True if restart of all peer-groups was successful, False otherwise
"""
res = True
for peer_group in sorted(peer_groups):
rc, out, err = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
if rc != 0:
log_value = peer_group, rc, out, err
log_crit("Can't restart bgp peer-group '%s'. rc='%d', out='%s', err='%s'" % log_value)
res = res and (rc == 0)
return res
10 changes: 6 additions & 4 deletions src/sonic-bgpcfgd/bgpcfgd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@
from .managers_setsrc import ZebraSetSrc
from .runner import Runner, signal_handler
from .template import TemplateFabric
from .utils import wait_for_daemons, read_constants
from .utils import read_constants
from .frr import FRR
from .vars import g_debug


def do_work():
""" Main function """
wait_for_daemons(["bgpd", "zebra", "staticd"], seconds=20)
frr = FRR(["bgpd", "zebra", "staticd"])
frr.wait_for_daemons(seconds=20)
#
common_objs = {
'directory': Directory(),
'cfg_mgr': ConfigMgr(),
'cfg_mgr': ConfigMgr(frr),
'tf': TemplateFabric(),
'constants': read_constants(),
}
Expand All @@ -52,7 +54,7 @@ def do_work():
# BBR Manager
BBRMgr(common_objs, "CONFIG_DB", "BGP_BBR"),
]
runner = Runner()
runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
runner.add_manager(mgr)
runner.run()
Expand Down
36 changes: 11 additions & 25 deletions src/sonic-bgpcfgd/bgpcfgd/managers_allow_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .log import log_debug, log_info, log_err, log_warn
from .template import TemplateFabric
from .manager import Manager
from .utils import run_command


class BGPAllowListMgr(Manager):
""" This class initialize "AllowList" settings """
Expand Down Expand Up @@ -147,9 +147,10 @@ def __update_policy(self, deployment_id, community_value, prefixes_v4, prefixes_
cmds += self.__update_allow_route_map_entry(self.V4, names['pl_v4'], names['community'], names['rm_v4'])
cmds += self.__update_allow_route_map_entry(self.V6, names['pl_v6'], names['community'], names['rm_v6'])
if cmds:
rc = self.cfg_mgr.push_list(cmds)
rc = rc and self.__restart_peers(deployment_id)
log_debug("BGPAllowListMgr::__update_policy. The peers were updated: rc=%s" % rc)
self.cfg_mgr.push_list(cmds)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
self.cfg_mgr.restart_peers(peer_groups)
log_debug("BGPAllowListMgr::__update_policy. The peers configuration scheduled for updates")
else:
log_debug("BGPAllowListMgr::__update_policy. Nothing to update")
log_info("BGPAllowListMgr::Done")
Expand All @@ -176,9 +177,10 @@ def __remove_policy(self, deployment_id, community_value):
cmds += self.__remove_prefix_list(self.V6, names['pl_v6'])
cmds += self.__remove_community(names['community'])
if cmds:
rc = self.cfg_mgr.push_list(cmds)
rc = rc and self.__restart_peers(deployment_id)
log_debug("BGPAllowListMgr::__remove_policy. 'Allow list' policy was removed. rc:%s" % rc)
self.cfg_mgr.push_list(cmds)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
self.cfg_mgr.restart_peers(peer_groups)
log_debug("BGPAllowListMgr::__remove_policy. 'Allow list' policy was scheduled for removal")
else:
log_debug("BGPAllowListMgr::__remove_policy. Nothing to remove")
log_info('BGPAllowListMgr::Done')
Expand Down Expand Up @@ -531,7 +533,8 @@ def __get_route_map_calls(self, rms):
inside_name = result.group(1)
return rm_2_call

def __get_peer_group_to_restart(self, deployment_id, pg_2_rm, rm_2_call):
@staticmethod
def __get_peer_group_to_restart(deployment_id, pg_2_rm, rm_2_call):
"""
Get peer_groups which are assigned to deployment_id
:deployment_id: deployment_id number
Expand Down Expand Up @@ -560,23 +563,6 @@ def __find_peer_group_by_deployment_id(self, deployment_id):
ret = self.__get_peer_group_to_restart(deployment_id, pg_2_rm, rm_2_call)
return list(ret)

def __restart_peers(self, deployment_id):
"""
Restart peer-groups with requested deployment_id
:param deployment_id: deployment_id number
"""
log_info("BGPAllowListMgr::Restart peers with deployment_id=%d" % deployment_id)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
rv = True
if peer_groups:
for peer_group in peer_groups:
no_error, _, _ = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
rv = no_error == 0 and rv
else:
no_error, _, _ = run_command(["vtysh", "-c", "clear bgp * soft in"])
rv = no_error == 0
return rv

def __get_enabled(self):
"""
Load enable/disabled property from constants
Expand Down
25 changes: 8 additions & 17 deletions src/sonic-bgpcfgd/bgpcfgd/managers_bbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from swsscommon import swsscommon

from .log import log_err, log_info, log_crit
from .log import log_err, log_info
from .manager import Manager
from .utils import run_command


class BBRMgr(Manager):
Expand Down Expand Up @@ -34,12 +33,10 @@ def set_handler(self, key, data):
return True
if not self.__set_validation(key, data):
return True
cmds = self.__set_prepare_config(data['status'])
rv = self.cfg_mgr.push_list(cmds)
if not rv:
log_crit("BBRMgr::can't apply configuration")
return True
self.__restart_peers()
cmds, peer_groups_to_restart = self.__set_prepare_config(data['status'])
self.cfg_mgr.push_list(cmds)
self.cfg_mgr.restart_peer_groups(peer_groups_to_restart)
log_info("BBRMgr::Scheduled BBR update")
return True

def del_handler(self, key):
Expand Down Expand Up @@ -112,12 +109,14 @@ def __set_prepare_config(self, status):
available_peer_groups = self.__get_available_peer_groups()
cmds = ["router bgp %s" % bgp_asn]
prefix_of_commands = "" if status == "enabled" else "no "
peer_groups_to_restart = set()
for af in ["ipv4", "ipv6"]:
cmds.append(" address-family %s" % af)
for pg_name in sorted(self.bbr_enabled_pgs.keys()):
if pg_name in available_peer_groups and af in self.bbr_enabled_pgs[pg_name]:
cmds.append(" %sneighbor %s allowas-in 1" % (prefix_of_commands, pg_name))
return cmds
peer_groups_to_restart.add(pg_name)
return cmds, list(peer_groups_to_restart)

def __get_available_peer_groups(self):
"""
Expand All @@ -132,11 +131,3 @@ def __get_available_peer_groups(self):
if m:
res.add(m.group(1))
return res

def __restart_peers(self):
""" Restart peer-groups which support BBR """
for peer_group in sorted(self.bbr_enabled_pgs.keys()):
rc, out, err = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
if rc != 0:
log_value = peer_group, rc, out, err
log_crit("BBRMgr::Can't restart bgp peer-group '%s'. rc='%d', out='%s', err='%s'" % log_value)
Loading