Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coordinator: Fix error policy routing table when pod has multi nics #3971

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions api/v1/agent/models/coordinator_config.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions api/v1/agent/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,6 @@ definitions:
type: boolean
detectGateway:
type: boolean
podNICs:
type: array
items:
type: string
required:
- overlayPodCIDR
- serviceCIDR
Expand Down
12 changes: 0 additions & 12 deletions api/v1/agent/server/embedded_spec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions cmd/coordinator/cmd/command_add.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
ipFamily: ipFamily,
currentInterface: args.IfName,
tuneMode: conf.Mode,
podNics: coordinatorConfig.PodNICs,
}
c.HijackCIDR = append(c.HijackCIDR, conf.ServiceCIDR...)
c.HijackCIDR = append(c.HijackCIDR, conf.HijackCIDR...)
Expand All @@ -120,6 +119,13 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
}
logger.Sugar().Debugf("Get current host netns: %v", c.hostNs.Path())

// checking if the nic is in up state
logger.Sugar().Debugf("checking if %s is in up state", args.IfName)
if err = c.checkNICState(args.IfName); err != nil {
logger.Error("error to check pod's nic state", zap.Error(err))
return fmt.Errorf("error to check pod's nic %s state: %v", args.Args, err)
}

// check if it's first time invoke
err = c.coordinatorModeAndFirstInvoke(logger, conf.PodDefaultCniNic)
if err != nil {
Expand Down Expand Up @@ -299,13 +305,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
return err
}

c.currentRuleTable = c.mustGetRuleNumber(c.podNics)
if c.currentRuleTable < 0 {
logger.Error("coordinator must be working with spiderpool: no spiderendpoint records found", zap.Strings("spiderNics", c.podNics))
return fmt.Errorf("coordinator must be working with spiderpool: no spiderendpoint records found")
}
logger.Debug("Get currentRuleTable", zap.Int("ruleTable", c.currentRuleTable))

allPodVethAddrs, err := networking.IPAddressByName(c.netns, defaultOverlayVethName, c.ipFamily)
if err != nil {
logger.Error(err.Error())
Expand Down
138 changes: 72 additions & 66 deletions cmd/coordinator/cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,39 +26,28 @@ type coordinator struct {
tuneMode Mode
hostVethName, podVethName, currentInterface string
v4HijackRouteGw, v6HijackRouteGw net.IP
HijackCIDR, podNics []string
HijackCIDR []string
netns, hostNs ns.NetNS
hostVethHwAddress, podVethHwAddress net.HardwareAddr
currentAddress []netlink.Addr
v4PodOverlayNicAddr, v6PodOverlayNicAddr *net.IPNet
hostIPRouteForPod []net.IP
}

func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string) error {
func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string, vethExist bool) error {
if mode != ModeAuto {
return nil
}

if c.currentInterface == podFirstInterface {
c.firstInvoke = true
c.tuneMode = ModeUnderlay
return nil
}

// veth0 must be present in underlay mode
vethExist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to check interface: %v exist: %v", defaultUnderlayVethName, err)
}

if vethExist {
c.tuneMode = ModeUnderlay
} else {
c.tuneMode = ModeOverlay
// If spinderpool only assigns a NIC to the pod, Indicates that it is the first invoke
if c.podNics[0] == c.currentInterface {
c.firstInvoke = true
}
}

return nil
Expand All @@ -69,27 +58,46 @@ func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string
// mode, and which can't be called in first cni invoked by using multus's
// annotations: v1.multus-cni.io/default-network
func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirstInterface string) error {
var err error
switch c.tuneMode {
case ModeAuto:
if err = c.autoModeToSpecificMode(ModeAuto, podFirstInterface); err != nil {
vethExist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}

links, err := networking.GetUPLinkList(c.netns)
if err != nil {
return fmt.Errorf("failed to get link list: %v", err)
}

for _, l := range links {
logger.Info("===debug link", zap.String("link", l.Attrs().Name))
}

if c.tuneMode == ModeAuto {
if err = c.autoModeToSpecificMode(ModeAuto, podFirstInterface, vethExist); err != nil {
return err
}
logger.Sugar().Infof("Successfully auto detect mode, change mode from auto to %v", c.tuneMode)
return nil
}

switch c.tuneMode {
case ModeUnderlay:
c.firstInvoke = c.currentInterface == podFirstInterface
// underlay mode can't work with calico/cilium(overlay)
if !c.firstInvoke {
var exist bool
exist, err = networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}
if !c.firstInvoke && !vethExist {
return fmt.Errorf("when creating interface %s in underlay mode, it detects that the auxiliary interface %s was not created by previous interface. please enable coordinator plugin in previous interface", c.currentInterface, podFirstInterface)
}

if !exist {
return fmt.Errorf("when creating interface %s in underlay mode, it detects that the auxiliary interface %s was not created by previous interface. please enable coordinator plugin in previous interface", c.currentInterface, podFirstInterface)
}
// ensure that each NIC has a separate policy routing table number
if c.firstInvoke {
// keep table 100 for eth0, first non-eth0 nic is table 101
c.currentRuleTable = defaultPodRuleTable + 1
} else {
// for non-eth0 or non first-underlay nic, Policy routing
// table numbers are cumulative based on the number of NICs
// for example:
// there are veth0, eth0,net1,net2 nic, the policy routing table numbers
// of net2 is: 4 + 98 == 102.
c.currentRuleTable = len(links) + 98
}
return nil
case ModeOverlay:
Expand All @@ -98,16 +106,23 @@ func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirst
return fmt.Errorf("when creating interface %s in overlay mode, it detects that the current interface is first interface named %s, this plugin should not work for it. please modify in the CNI configuration", c.currentInterface, podFirstInterface)
}

exist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}

if exist {
if vethExist {
return fmt.Errorf("when creating interface %s in overlay mode, it detects that the auxiliary interface %s of underlay mode exists. It seems that the previous interface work in underlay mode. ", c.currentInterface, defaultUnderlayVethName)
}

c.firstInvoke = c.podNics[0] == c.currentInterface
// if pod has only eth0 and net1, the first invoke is true
c.firstInvoke = len(links) == 2
if c.firstInvoke {
// keep table 100 for eth0, first non-eth0 nic is table 101
c.currentRuleTable = defaultPodRuleTable + 1
} else {
// for non-eth0 or non first-underlay nic, Policy routing
// table numbers are cumulative based on the number of NICs
// for example:
// there are eth0,net1,net2 nic, the policy routing table numbers
// of net2 is: 3 + 99 == 102.
c.currentRuleTable = 99 + len(links)
}
return nil
case ModeDisable:
return nil
Expand All @@ -116,21 +131,18 @@ func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirst
return fmt.Errorf("unknown tuneMode: %s", c.tuneMode)
}

// getRuleNumber return the number of rule table corresponding to the previous interface from the given interface.
// for example:
// input: net1, output: 100(eth0)
// input: net2, output: 101(net1)
func (c *coordinator) mustGetRuleNumber(spiderNics []string) int {
if len(spiderNics) == 0 {
return -1
}
func (c *coordinator) checkNICState(iface string) error {
return c.netns.Do(func(netNS ns.NetNS) error {
link, err := netlink.LinkByName(iface)
if err != nil {
return err
}

if c.currentInterface == defaultOverlayVethName {
return unix.RT_TABLE_MAIN
} else if spiderNics[0] == c.currentInterface {
return defaultPodRuleTable
}
return defaultPodRuleTable + len(spiderNics) - 1
if link.Attrs().Flags != net.FlagUp {
return netlink.LinkSetUp(link)
}
return nil
})
}

// setupVeth sets up a pair of virtual ethernet devices. move one to the host and other
Expand Down Expand Up @@ -306,22 +318,16 @@ func (c *coordinator) setupHijackRoutes(logger *zap.Logger, ruleTable int) error
src = c.v6PodOverlayNicAddr
}

if c.firstInvoke {
ruleTable = unix.RT_TABLE_MAIN
}

if err := networking.AddRoute(logger, ruleTable, c.ipFamily, netlink.SCOPE_UNIVERSE, c.podVethName, src, ipNet, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
logger.Error("failed to AddRoute for hijackCIDR", zap.String("Dst", ipNet.String()), zap.Error(err))
return fmt.Errorf("failed to AddRoute for hijackCIDR: %v", err)
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
if err := networking.AddRoute(logger, unix.RT_TABLE_MAIN, c.ipFamily, netlink.SCOPE_UNIVERSE, c.podVethName, src, ipNet, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
logger.Error("failed to AddRoute for hijackCIDR", zap.String("Dst", ipNet.String()), zap.Error(err))
return fmt.Errorf("failed to AddRoute for hijackCIDR: %v", err)
}
logger.Debug("Add Route for hijackSubnet in pod successfully", zap.String("Dst", ipNet.String()))
}

logger.Debug("AddRouteTable for localCIDRs successfully", zap.String("hijick cidr", hijack), zap.Int("Table", ruleTable))
}
logger.Debug("AddRouteTable for localCIDRs successfully", zap.Strings("localCIDRs", c.HijackCIDR))

return nil
})
return err
Expand All @@ -347,7 +353,7 @@ func (c *coordinator) setupHostRoutes(logger *zap.Logger) error {
return err
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
if c.firstInvoke {
if err = networking.AddRoute(logger, unix.RT_TABLE_MAIN, c.ipFamily, netlink.SCOPE_LINK, c.podVethName, src, ipNet, nil, nil); err != nil {
logger.Error("failed to AddRoute for ipAddressOnNode", zap.Error(err))
return err
Expand Down Expand Up @@ -488,13 +494,13 @@ func (c *coordinator) tunePodRoutes(logger *zap.Logger, configDefaultRouteNIC st
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
// mv calico or cilium default route to table 500 to fix to the problem of
// mv calico or cilium default route to table 100 to fix to the problem of
// inconsistent routes, the pod forwards the response packet from net1 (macvlan)
// when it sends the response packet. but the request packet comes in eth0(calico).
// when it sends the response packet. but the request packet comes from eth0(calico).
// see https://github.com/spidernet-io/spiderpool/issues/3683

// copy to table 500,
podOverlayDefaultRouteRuleTable := c.hostRuleTable
// copy to table 100
podOverlayDefaultRouteRuleTable := defaultPodRuleTable
for idx := range defaultInterfaceAddress {
ipNet := networking.ConvertMaxMaskIPNet(defaultInterfaceAddress[idx].IP)
err = networking.AddFromRuleTable(ipNet, podOverlayDefaultRouteRuleTable)
Expand Down Expand Up @@ -555,7 +561,7 @@ func (c *coordinator) makeReplyPacketViaVeth(logger *zap.Logger) error {
}

for _, family := range ipFamily {
if err := networking.AddRuleTableWithMark(markInt, c.hostRuleTable, family); err != nil && !os.IsExist(err) {
if err := networking.AddRuleTableWithMark(markInt, defaultPodRuleTable, family); err != nil && !os.IsExist(err) {
return fmt.Errorf("failed to add rule table with mark: %v", err)
}

Expand All @@ -566,7 +572,7 @@ func (c *coordinator) makeReplyPacketViaVeth(logger *zap.Logger) error {
src = c.v6PodOverlayNicAddr
}

if err := networking.AddRoute(logger, c.hostRuleTable, family, netlink.SCOPE_UNIVERSE, c.podVethName, src, nil, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
if err := networking.AddRoute(logger, defaultPodRuleTable, family, netlink.SCOPE_UNIVERSE, c.podVethName, src, nil, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
return err
}
}
Expand Down
18 changes: 0 additions & 18 deletions cmd/spiderpool-agent/cmd/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
ctx := params.HTTPRequest.Context()
crdClient := agentContext.CRDManager.GetClient()
podClient := agentContext.PodManager
epClient := agentContext.EndpointManager
kubevirtMgr := agentContext.KubevirtManager

var coordList spiderpoolv2beta1.SpiderCoordinatorList
Expand All @@ -46,8 +45,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
}

var err error
var spNics []string
var se *spiderpoolv2beta1.SpiderEndpoint

var pod *corev1.Pod
pod, err = podClient.GetPodByName(ctx, params.GetCoordinatorConfig.PodNamespace, params.GetCoordinatorConfig.PodName, constant.UseCache)
Expand All @@ -57,19 +54,11 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig

isVMPod := false
// kubevirt vm pod corresponding SpiderEndpoint uses kubevirt VM/VMI name
endpointName := params.GetCoordinatorConfig.PodName
ownerReference := metav1.GetControllerOf(pod)
if ownerReference != nil && agentContext.Cfg.EnableKubevirtStaticIP && ownerReference.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerReference.Kind == constant.KindKubevirtVMI {
endpointName = ownerReference.Name
isVMPod = true
}

// get spiderendpoint
se, err = epClient.GetEndpointByName(ctx, params.GetCoordinatorConfig.PodNamespace, endpointName, constant.UseCache)
if err != nil && !apierrors.IsNotFound(err) {
return daemonset.NewGetCoordinatorConfigFailure().WithPayload(models.Error(fmt.Sprintf("failed to get spiderendpoint %s/%s", params.GetCoordinatorConfig.PodNamespace, params.GetCoordinatorConfig.PodName)))
}

// cancel IP conflict detection for the kubevirt vm live migration new pod
detectIPConflict := *coord.Spec.DetectIPConflict
if detectIPConflict && isVMPod {
Expand All @@ -95,12 +84,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
}
}

if se != nil {
for _, spip := range se.Status.Current.IPs {
spNics = append(spNics, spip.NIC)
}
}

var prefix string
if coord.Spec.PodMACPrefix != nil {
prefix = *coord.Spec.PodMACPrefix
Expand Down Expand Up @@ -129,7 +112,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
TxQueueLen: int64(*coord.Spec.TxQueueLen),
DetectGateway: *coord.Spec.DetectGateway,
DetectIPConflict: detectIPConflict,
PodNICs: spNics,
}

if config.OverlayPodCIDR == nil {
Expand Down
Loading
Loading