Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes in swss-common submodule to support NAT feature. #304

Merged
merged 5 commits into from
Nov 6, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ libswsscommon_la_SOURCES = \
macaddress.cpp \
netdispatcher.cpp \
netlink.cpp \
nfnetlink.cpp \
notificationconsumer.cpp \
notificationproducer.cpp \
linkcache.cpp \
Expand Down
5 changes: 5 additions & 0 deletions common/ipaddress.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class IpAddress
);
}

inline bool operator!=(const IpAddress &o) const
{
return (! (*this == o) );
}

std::string to_string() const;

enum AddrScope {
Expand Down
209 changes: 209 additions & 0 deletions common/nfnetlink.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#include <string.h>
#include <errno.h>
#include <system_error>
#include "common/logger.h"
#include "common/netmsg.h"
#include "common/netdispatcher.h"
#include "common/netlink.h"
#include "common/nfnetlink.h"

using namespace swss;
using namespace std;

NfNetlink::NfNetlink(int pri) :
Selectable(pri)
{
m_socket = nl_socket_alloc();
if (!m_socket)
{
SWSS_LOG_ERROR("Unable to allocated nfnetlink socket");
throw system_error(make_error_code(errc::address_not_available),
"Unable to allocate nfnetlink socket");
}

nl_socket_disable_seq_check(m_socket);
nl_socket_disable_auto_ack(m_socket);

int err = nfnl_connect(m_socket);
if (err < 0)
{
SWSS_LOG_ERROR("Unable to connect nfnetlink socket: %s", nl_geterror(err));
nl_socket_free(m_socket);
m_socket = NULL;
throw system_error(make_error_code(errc::address_not_available),
"Unable to connect nfnetlink socket");
}

nl_socket_set_nonblocking(m_socket);

/* Set socket buffer size to 10 MB */
nl_socket_set_buffer_size(m_socket, 10485760, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make the buffer configurable from class user?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added an API to the class for setting the send/recv socket buffer size.


#ifdef NETFILTER_UNIT_TEST
/* For netlink packet tracing purposes */
nfPktsLogFile = fopen("/var/log/nf_netlink_pkts.log", "w");
if (nfPktsLogFile == NULL)
{
SWSS_LOG_ERROR("Unable to open netfilter netlink packets log file: %s", strerror(errno));
}
#endif
}

NfNetlink::~NfNetlink()
{
if (m_socket != NULL)
{
nl_close(m_socket);
nl_socket_free(m_socket);
}
}

void NfNetlink::registerRecvCallbacks(void)
{
#ifdef NETFILTER_UNIT_TEST
nl_socket_modify_cb(m_socket, NL_CB_MSG_IN, NL_CB_CUSTOM, onNetlinkRcv, this);
#endif

nl_socket_modify_cb(m_socket, NL_CB_VALID, NL_CB_CUSTOM, onNetlinkMsg, this);
}

void NfNetlink::registerGroup(int nfnlGroup)
{
int err = nl_socket_add_membership(m_socket, nfnlGroup);
if (err < 0)
{
SWSS_LOG_ERROR("Unable to register to netfilter group %d: %s", nfnlGroup,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it better to use SWSS_LOG_THROW - shortcut to log error and to throw?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

nl_geterror(err));
throw system_error(make_error_code(errc::address_not_available),
"Unable to register to netfilter group");
}
}
void NfNetlink::dumpRequest(int getCommand)
{
int err = nfnl_ct_dump_request(m_socket);
if (err < 0)
{
SWSS_LOG_ERROR("Unable to request netfilter conntrack dump: %s",
nl_geterror(err));
throw system_error(make_error_code(errc::address_not_available),
"Unable to request netfilter conntrack dump");
}
}

void NfNetlink::updateConnTrackEntry(struct nfnl_ct *ct)
{
if (nfnl_ct_add(m_socket, ct, NLM_F_REPLACE) < 0)
{
SWSS_LOG_ERROR("Failed to update conntrack object in the kernel");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest to SWSS_LOG_THROW or even better return a boolean whether API succeeded or not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
}

void NfNetlink::deleteConnTrackEntry(struct nfnl_ct *ct)
{
if (nfnl_ct_del(m_socket, ct, 0) < 0)
{
SWSS_LOG_ERROR("Failed to delete conntrack object in the kernel");
}
}

struct nfnl_ct *NfNetlink::getCtObject(const IpAddress &sourceIpAddr)
{
struct nfnl_ct *ct = nfnl_ct_alloc();
struct nl_addr *orig_src_ip;

if (! ct)
{
SWSS_LOG_ERROR("Unable to allocate memory for conntrack object");
return NULL;
}

nfnl_ct_set_family(ct, AF_INET);
if (nl_addr_parse(sourceIpAddr.to_string().c_str(), AF_INET, &orig_src_ip) == 0)
{
nfnl_ct_set_src(ct, 0, orig_src_ip);
}
else
{
SWSS_LOG_ERROR("Failed to parse orig src ip into conntrack object");
nfnl_ct_put(ct);
return NULL;
}

return ct;
}

struct nfnl_ct *NfNetlink::getCtObject(uint8_t protoType, const IpAddress &sourceIpAddr, uint16_t srcPort)
{
struct nfnl_ct *ct = nfnl_ct_alloc();
struct nl_addr *orig_src_ip;

if (! ct)
{
SWSS_LOG_ERROR("Unable to allocate memory for conntrack object");
return NULL;
}

nfnl_ct_set_family(ct, AF_INET);
nfnl_ct_set_proto(ct, protoType);

if (nl_addr_parse(sourceIpAddr.to_string().c_str(), AF_INET, &orig_src_ip) == 0)
{
nfnl_ct_set_src(ct, 0, orig_src_ip);
}
else
{
SWSS_LOG_ERROR("Failed to parse orig src ip into conntrack object");
nfnl_ct_put(ct);
return NULL;
}
nfnl_ct_set_src_port(ct, 0, srcPort);

return ct;
}

int NfNetlink::getFd()
{
return nl_socket_get_fd(m_socket);
}

uint64_t NfNetlink::readData()
{
int err;

do
{
err = nl_recvmsgs_default(m_socket);
}
while(err == -NLE_INTR); // Retry if the process was interrupted by a signal

if (err < 0)
{
if (err == -NLE_NOMEM)
SWSS_LOG_ERROR("netlink reports out of memory on reading a netfilter netlink socket. High possiblity of a lost message");
else if (err == -NLE_AGAIN)
SWSS_LOG_DEBUG("netlink reports NLE_AGAIN on reading a netfilter netlink socket");
else
SWSS_LOG_ERROR("netlink reports an error=%d on reading a netfilter netlink socket", err);
}
return 0;
}

int NfNetlink::onNetlinkMsg(struct nl_msg *msg, void *arg)
{
NetDispatcher::getInstance().onNetlinkMessage(msg);
return NL_OK;
}

#ifdef NETFILTER_UNIT_TEST
int NfNetlink::onNetlinkRcv(struct nl_msg *msg, void *arg)
{
NfNetlink *nfnlink = (NfNetlink *)arg;

if ((nfnlink == NULL) || (nfnlink->nfPktsLogFile == NULL))
return NL_OK;

nl_msg_dump(msg, nfnlink->nfPktsLogFile);

return NL_OK;
}
#endif
39 changes: 39 additions & 0 deletions common/nfnetlink.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __NFNETLINK__
#define __NFNETLINK__

#include "ipaddress.h"
#include <netlink/netfilter/ct.h>
#include <netlink/netfilter/nfnl.h>

namespace swss {

class NfNetlink : public Selectable {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of code duplicates Netlink class, did you consider to inherit from Netlink?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inheriting from Netlink class was considered, but the Netlink class is specific to NETLINK_ROUTE and initializes m_socket member to route socket in its constructor. But NfNetlink class is specific to NETFILTER socket and Netlink class constructor invocation would result in unnecessary calls specific Route socket.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the difference is not big, one thing you can do is to define virtual private "connect" method and override it for NfNetlink to call nfnl_connect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stepanblyschak Are you saying, we have connect() as virtual function which is overridden by derived NfNetlink class, and call connect() in NetLink() construtor in the place of nl_connect() in the current code?
If so, since the virtual function connect() is called in the NetLink constructor, it would resolve to NetLink::connect() instead of NfNetlink::connect().
We need to get the connect() call out of the NetLink constructor and have the users of the NetLink class call the connect() explicitly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, you're correct

public:
NfNetlink(int pri = 0);
~NfNetlink() override;

void registerRecvCallbacks(void);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please remove void parameter?. Seems it is legacy from C-code and in this file such style is not used expect this method

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

void registerGroup(int nfnlGroup);
void dumpRequest(int getCommand);

int getFd() override;
uint64_t readData() override;

void updateConnTrackEntry(struct nfnl_ct *ct);
void deleteConnTrackEntry(struct nfnl_ct *ct);

private:
struct nfnl_ct *getCtObject(const IpAddress &sourceIpAddr);
struct nfnl_ct *getCtObject(uint8_t protoType, const IpAddress &sourceIpAddr, uint16_t srcPort);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are private and not used here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were added initially during development, now these can be removed. Good catch.


static int onNetlinkRcv(struct nl_msg *msg, void *arg);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put onNetlinkRcv declaration in #ifdef NETFILTER_UNIT_TEST

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

static int onNetlinkMsg(struct nl_msg *msg, void *arg);

FILE *nfPktsLogFile;
nl_sock *m_socket;
};

}

#endif /* __NFNETLINK__ */

20 changes: 20 additions & 0 deletions common/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ namespace swss {
#define APP_SFLOW_SESSION_TABLE_NAME "SFLOW_SESSION_TABLE"
#define APP_SFLOW_SAMPLE_RATE_TABLE_NAME "SFLOW_SAMPLE_RATE_TABLE"

#define APP_NAT_TABLE_NAME "NAT_TABLE"
#define APP_NAPT_TABLE_NAME "NAPT_TABLE"
#define APP_NAT_TWICE_TABLE_NAME "NAT_TWICE_TABLE"
#define APP_NAPT_TWICE_TABLE_NAME "NAPT_TWICE_TABLE"
#define APP_NAT_GLOBAL_TABLE_NAME "NAT_GLOBAL_TABLE"
#define APP_NAPT_POOL_IP_TABLE_NAME "NAPT_POOL_IP_TABLE"

/***** TO BE REMOVED *****/

#define APP_TC_TO_QUEUE_MAP_TABLE_NAME "TC_TO_QUEUE_MAP_TABLE"
Expand Down Expand Up @@ -83,6 +90,12 @@ namespace swss {
#define COUNTERS_DEBUG_NAME_PORT_STAT_MAP "COUNTERS_DEBUG_NAME_PORT_STAT_MAP"
#define COUNTERS_DEBUG_NAME_SWITCH_STAT_MAP "COUNTERS_DEBUG_NAME_SWITCH_STAT_MAP"

#define COUNTERS_NAT_TABLE "COUNTERS_NAT"
#define COUNTERS_NAPT_TABLE "COUNTERS_NAPT"
#define COUNTERS_TWICE_NAT_TABLE "COUNTERS_TWICE_NAT"
#define COUNTERS_TWICE_NAPT_TABLE "COUNTERS_TWICE_NAPT"
#define COUNTERS_GLOBAL_NAT_TABLE "COUNTERS_GLOBAL_NAT"

#define PERIODIC_WATERMARKS_TABLE "PERIODIC_WATERMARKS"
#define PERSISTENT_WATERMARKS_TABLE "PERSISTENT_WATERMARKS"
#define USER_WATERMARKS_TABLE "USER_WATERMARKS"
Expand Down Expand Up @@ -200,6 +213,12 @@ namespace swss {
#define CFG_DEBUG_COUNTER_TABLE_NAME "DEBUG_COUNTER"
#define CFG_DEBUG_COUNTER_DROP_REASON_TABLE_NAME "DEBUG_COUNTER_DROP_REASON"

#define CFG_STATIC_NAT_TABLE_NAME "STATIC_NAT"
#define CFG_STATIC_NAPT_TABLE_NAME "STATIC_NAPT"
#define CFG_NAT_POOL_TABLE_NAME "NAT_POOL"
#define CFG_NAT_BINDINGS_TABLE_NAME "NAT_BINDINGS"
#define CFG_NAT_GLOBAL_TABLE_NAME "NAT_GLOBAL"

/***** STATE DATABASE *****/

#define STATE_SWITCH_CAPABILITY_TABLE_NAME "SWITCH_CAPABILITY_TABLE"
Expand All @@ -219,6 +238,7 @@ namespace swss {
#define STATE_VXLAN_TABLE_NAME "VXLAN_TABLE"
#define STATE_BGP_TABLE_NAME "BGP_STATE_TABLE"
#define STATE_DEBUG_COUNTER_CAPABILITIES_NAME "DEBUG_COUNTER_CAPABILITIES"
#define STATE_NAT_RESTORE_TABLE_NAME "NAT_RESTORE_TABLE"

/***** MISC *****/

Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ AC_HEADER_STDC
AM_PATH_PYTHON

AC_CHECK_LIB([hiredis], [redisConnect])
PKG_CHECK_MODULES([LIBNL], [libnl-3.0 libnl-genl-3.0 libnl-route-3.0])
PKG_CHECK_MODULES([LIBNL], [libnl-3.0 libnl-genl-3.0 libnl-route-3.0 libnl-nf-3.0])
CFLAGS="$CFLAGS $LIBNL_CFLAGS"
LIBS="$LIBS $LIBNL_LIBS"

Expand Down