Skip to content

Commit

Permalink
Handle dual ToR neighbor miss scenario (#2151)
Browse files Browse the repository at this point in the history
* Handle dual ToR neighbor miss scenario (#2137)

- When orchagent receives a neighbor update with a zero MAC:
    - If the neighbor IP is configured for a specific mux cable port in the MUX_CABLE table in CONFIG_DB, handle the neighbor normally (if active for the port, no action is needed. if standby, a tunnel route is created for the neighbor IP)
    - If the neighbor IP is not configured for a specific port, create a tunnel route for the IP to the peer switch.
        - When these neighbor IPs are eventually resolved, remove the tunnel route and handle the neighbor normally.
- When creating/initializing a mux cable object, set the internal state to standby to match the constructor behavior.

- Various formatting fixes inside test_mux.py
- Remove references to deprecated `@pytest.yield_fixture`
- Add dual ToR neighbor miss test cases:
    - Test cases and expected results are described in `mux_neigh_miss_tests.py`. These descriptions are used by the generic test runner `test_neighbor_miss` function to execute the test actions and verify expected results
    - Various setup fixtures and test info fixtures were added
    - Existing test cases were changed to use these setup fixtures for consistency

Signed-off-by: Lawrence Lee <lawlee@microsoft.com>
Co-authored-by: Sumukha Tumkur Vani <stumkurv@microsoft.com>
  • Loading branch information
theasianpianist and Sumukha Tumkur Vani authored Aug 20, 2022
1 parent 9eb4422 commit dec4570
Show file tree
Hide file tree
Showing 14 changed files with 773 additions and 153 deletions.
34 changes: 30 additions & 4 deletions neighsyncd/neighsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConne
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
m_cfgInterfaceTable(cfgDb, CFG_INTF_TABLE_NAME),
m_cfgLagInterfaceTable(cfgDb, CFG_LAG_INTF_TABLE_NAME),
m_cfgVlanInterfaceTable(cfgDb, CFG_VLAN_INTF_TABLE_NAME)
m_cfgVlanInterfaceTable(cfgDb, CFG_VLAN_INTF_TABLE_NAME),
m_cfgPeerSwitchTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME)
{
m_AppRestartAssist = new AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", DEFAULT_NEIGHSYNC_WARMSTART_TIMER);
if (m_AppRestartAssist)
Expand Down Expand Up @@ -108,14 +109,39 @@ void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
return;
}

std::vector<std::string> peerSwitchKeys;
bool delete_key = false;
if ((nlmsg_type == RTM_DELNEIGH) || (state == NUD_INCOMPLETE) ||
(state == NUD_FAILED))
bool use_zero_mac = false;
m_cfgPeerSwitchTable.getKeys(peerSwitchKeys);
bool is_dualtor = peerSwitchKeys.size() > 0;
if (is_dualtor && (state == NUD_INCOMPLETE || state == NUD_FAILED))
{
SWSS_LOG_INFO("Unable to resolve %s, setting zero MAC", key.c_str());
use_zero_mac = true;

// Unresolved neighbor deletion on dual ToR devices must be handled
// separately, otherwise delete_key is never set to true
// and neighorch is never able to remove the neighbor
if (nlmsg_type == RTM_DELNEIGH)
{
delete_key = true;
}
}
else if ((nlmsg_type == RTM_DELNEIGH) ||
(state == NUD_INCOMPLETE) || (state == NUD_FAILED))
{
delete_key = true;
}

nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
if (use_zero_mac)
{
std::string zero_mac = "00:00:00:00:00:00";
strncpy(macStr, zero_mac.c_str(), zero_mac.length());
}
else
{
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
}

/* Ignore neighbor entries with Broadcast Mac - Trigger for directed broadcast */
if (!delete_key && (MacAddress(macStr) == MacAddress("ff:ff:ff:ff:ff:ff")))
Expand Down
2 changes: 1 addition & 1 deletion neighsyncd/neighsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class NeighSync : public NetMsg
}

private:
Table m_stateNeighRestoreTable;
Table m_stateNeighRestoreTable, m_cfgPeerSwitchTable;
ProducerStateTable m_neighTable;
AppRestartAssist *m_AppRestartAssist;
Table m_cfgVlanInterfaceTable, m_cfgLagInterfaceTable, m_cfgInterfaceTable;
Expand Down
52 changes: 52 additions & 0 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,37 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
return;
}

auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
// Handling zero MAC neighbor updates
if (!update.mac)
{
/* For neighbors that were previously resolvable but are now unresolvable,
* we expect such neighbor entries to be deleted prior to a zero MAC update
* arriving for that same neighbor.
*/

if (update.add)
{
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
{
createStandaloneTunnelRoute(update.entry.ip_address);
}
/* If the MAC address in the neighbor entry is zero but the neighbor IP
* is already present in standalone_tunnel_neighbors_, assume we have already
* added a tunnel route for it and exit early
*/
return;
}
}
/* If the update operation for a neighbor contains a non-zero MAC, we must
* make sure to remove any existing tunnel routes to prevent conflicts.
* This block also covers the case of neighbor deletion.
*/
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
{
removeStandaloneTunnelRoute(update.entry.ip_address);
}

for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
{
MuxCable* ptr = it->second.get();
Expand Down Expand Up @@ -1376,6 +1407,27 @@ bool MuxOrch::delOperation(const Request& request)
return true;
}

void MuxOrch::createStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Creating standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
sai_object_id_t tunnel_nexthop = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
if (tunnel_nexthop == SAI_NULL_OBJECT_ID) {
SWSS_LOG_NOTICE("%s nexthop not created yet, ignoring tunnel route creation for %s", MUX_TUNNEL, neighborIp.to_string().c_str());
return;
}
IpPrefix pfx = neighborIp.to_string();
create_route(pfx, tunnel_nexthop);
standalone_tunnel_neighbors_.insert(neighborIp);
}

void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Removing standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
IpPrefix pfx = neighborIp.to_string();
remove_route(pfx);
standalone_tunnel_neighbors_.erase(neighborIp);
}

MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
Orch2(db, tableName, request_),
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),
Expand Down
8 changes: 8 additions & 0 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,13 @@ class MuxOrch : public Orch2, public Observer, public Subject

bool getMuxPort(const MacAddress&, const string&, string&);

/***
* Methods for managing tunnel routes for neighbor IPs not associated
* with a specific mux cable
***/
void createStandaloneTunnelRoute(IpAddress neighborIp);
void removeStandaloneTunnelRoute(IpAddress neighborIp);

IpAddress mux_peer_switch_ = 0x0;
sai_object_id_t mux_tunnel_id_ = SAI_NULL_OBJECT_ID;

Expand All @@ -219,6 +226,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
FdbOrch *fdb_orch_;

MuxCfgRequest request_;
std::set<IpAddress> standalone_tunnel_neighbors_;
};

const request_description_t mux_cable_request_description = {
Expand Down
20 changes: 19 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,16 @@ void NeighOrch::doTask(Consumer &consumer)
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
{
if (addNeighbor(neighbor_entry, mac_address))
// only for unresolvable neighbors that are new
if (!mac_address)
{
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
addZeroMacTunnelRoute(neighbor_entry, mac_address);
}
it = consumer.m_toSync.erase(it);
}
else if (addNeighbor(neighbor_entry, mac_address))
{
it = consumer.m_toSync.erase(it);
}
Expand Down Expand Up @@ -1716,3 +1725,12 @@ void NeighOrch::updateSrv6Nexthop(const NextHopKey &nh, const sai_object_id_t &n
m_syncdNextHops.erase(nh);
}
}
void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
{
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
NeighborUpdate update = {entry, mac, true};
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
m_syncdNeighbors[entry] = { mac, false };
}

2 changes: 2 additions & 0 deletions orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ class NeighOrch : public Orch, public Subject, public Observer

bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
void clearResolvedNeighborEntry(const NeighborEntry &);

void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
};

#endif /* SWSS_NEIGHORCH_H */
22 changes: 13 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1794,15 +1794,15 @@ def update_dvs(log_path, new_dvs_env=[]):
dvs.runcmd("mv /etc/sonic/config_db.json.orig /etc/sonic/config_db.json")
dvs.ctn_restart()

@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dvs(request, manage_dvs) -> DockerVirtualSwitch:
dvs_env = getattr(request.module, "DVS_ENV", [])
name = request.config.getoption("--dvsname")
log_path = name if name else request.module.__name__

return manage_dvs(log_path, dvs_env)

@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def vct(request):
vctns = request.config.getoption("--vctns")
topo = request.config.getoption("--topo")
Expand All @@ -1821,7 +1821,8 @@ def vct(request):
vct.get_logs(request.module.__name__)
vct.destroy()

@pytest.yield_fixture

@pytest.fixture
def testlog(request, dvs):
dvs.runcmd(f"logger -t pytest === start test {request.node.nodeid} ===")
yield testlog
Expand Down Expand Up @@ -1850,27 +1851,29 @@ def dvs_route(request, dvs) -> DVSRoute:

# FIXME: The rest of these also need to be reverted back to normal fixtures to
# appease the linter.
@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_lag_manager(request, dvs):
request.cls.dvs_lag = dvs_lag.DVSLag(dvs.get_asic_db(),
dvs.get_config_db(),
dvs)


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_vlan_manager(request, dvs):
request.cls.dvs_vlan = dvs_vlan.DVSVlan(dvs.get_asic_db(),
dvs.get_config_db(),
dvs.get_state_db(),
dvs.get_counters_db(),
dvs.get_app_db())

@pytest.yield_fixture(scope="class")

@pytest.fixture(scope="class")
def dvs_port_manager(request, dvs):
request.cls.dvs_port = dvs_port.DVSPort(dvs.get_asic_db(),
dvs.get_config_db())

@pytest.yield_fixture(scope="class")

@pytest.fixture(scope="class")
def dvs_mirror_manager(request, dvs):
request.cls.dvs_mirror = dvs_mirror.DVSMirror(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1879,7 +1882,7 @@ def dvs_mirror_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_policer_manager(request, dvs):
request.cls.dvs_policer = dvs_policer.DVSPolicer(dvs.get_asic_db(),
dvs.get_config_db())
Expand All @@ -1897,7 +1900,8 @@ def remove_dpb_config_file(dvs):
cmd = "mv /etc/sonic/config_db.json.bak /etc/sonic/config_db.json"
dvs.runcmd(cmd)

@pytest.yield_fixture(scope="module")

@pytest.fixture(scope="module")
def dpb_setup_fixture(dvs):
create_dpb_config_file(dvs)
if dvs.vct is None:
Expand Down
Loading

0 comments on commit dec4570

Please sign in to comment.