Skip to content

Commit

Permalink
Merge pull request #3971 from cyclinder/cherry_pick_1.0_policy_number
Browse files Browse the repository at this point in the history
coordinator: Fix error policy routing table when pod has multi nics
  • Loading branch information
cyclinder authored Aug 29, 2024
2 parents 083770b + eb8c6ff commit c1fb610
Show file tree
Hide file tree
Showing 16 changed files with 403 additions and 162 deletions.
3 changes: 0 additions & 3 deletions api/v1/agent/models/coordinator_config.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions api/v1/agent/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,6 @@ definitions:
type: boolean
detectGateway:
type: boolean
podNICs:
type: array
items:
type: string
required:
- overlayPodCIDR
- serviceCIDR
Expand Down
12 changes: 0 additions & 12 deletions api/v1/agent/server/embedded_spec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions cmd/coordinator/cmd/command_add.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
ipFamily: ipFamily,
currentInterface: args.IfName,
tuneMode: conf.Mode,
podNics: coordinatorConfig.PodNICs,
}
c.HijackCIDR = append(c.HijackCIDR, conf.ServiceCIDR...)
c.HijackCIDR = append(c.HijackCIDR, conf.HijackCIDR...)
Expand All @@ -120,6 +119,13 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
}
logger.Sugar().Debugf("Get current host netns: %v", c.hostNs.Path())

// checking if the nic is in up state
logger.Sugar().Debugf("checking if %s is in up state", args.IfName)
if err = c.checkNICState(args.IfName); err != nil {
logger.Error("error to check pod's nic state", zap.Error(err))
return fmt.Errorf("error to check pod's nic %s state: %v", args.Args, err)
}

// check if it's first time invoke
err = c.coordinatorModeAndFirstInvoke(logger, conf.PodDefaultCniNic)
if err != nil {
Expand Down Expand Up @@ -299,13 +305,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
return err
}

c.currentRuleTable = c.mustGetRuleNumber(c.podNics)
if c.currentRuleTable < 0 {
logger.Error("coordinator must be working with spiderpool: no spiderendpoint records found", zap.Strings("spiderNics", c.podNics))
return fmt.Errorf("coordinator must be working with spiderpool: no spiderendpoint records found")
}
logger.Debug("Get currentRuleTable", zap.Int("ruleTable", c.currentRuleTable))

allPodVethAddrs, err := networking.IPAddressByName(c.netns, defaultOverlayVethName, c.ipFamily)
if err != nil {
logger.Error(err.Error())
Expand Down
138 changes: 72 additions & 66 deletions cmd/coordinator/cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,39 +26,28 @@ type coordinator struct {
tuneMode Mode
hostVethName, podVethName, currentInterface string
v4HijackRouteGw, v6HijackRouteGw net.IP
HijackCIDR, podNics []string
HijackCIDR []string
netns, hostNs ns.NetNS
hostVethHwAddress, podVethHwAddress net.HardwareAddr
currentAddress []netlink.Addr
v4PodOverlayNicAddr, v6PodOverlayNicAddr *net.IPNet
hostIPRouteForPod []net.IP
}

func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string) error {
func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string, vethExist bool) error {
if mode != ModeAuto {
return nil
}

if c.currentInterface == podFirstInterface {
c.firstInvoke = true
c.tuneMode = ModeUnderlay
return nil
}

// veth0 must be present in underlay mode
vethExist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to check interface: %v exist: %v", defaultUnderlayVethName, err)
}

if vethExist {
c.tuneMode = ModeUnderlay
} else {
c.tuneMode = ModeOverlay
// If spinderpool only assigns a NIC to the pod, Indicates that it is the first invoke
if c.podNics[0] == c.currentInterface {
c.firstInvoke = true
}
}

return nil
Expand All @@ -69,27 +58,46 @@ func (c *coordinator) autoModeToSpecificMode(mode Mode, podFirstInterface string
// mode, and which can't be called in first cni invoked by using multus's
// annotations: v1.multus-cni.io/default-network
func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirstInterface string) error {
var err error
switch c.tuneMode {
case ModeAuto:
if err = c.autoModeToSpecificMode(ModeAuto, podFirstInterface); err != nil {
vethExist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}

links, err := networking.GetUPLinkList(c.netns)
if err != nil {
return fmt.Errorf("failed to get link list: %v", err)
}

for _, l := range links {
logger.Info("===debug link", zap.String("link", l.Attrs().Name))
}

if c.tuneMode == ModeAuto {
if err = c.autoModeToSpecificMode(ModeAuto, podFirstInterface, vethExist); err != nil {
return err
}
logger.Sugar().Infof("Successfully auto detect mode, change mode from auto to %v", c.tuneMode)
return nil
}

switch c.tuneMode {
case ModeUnderlay:
c.firstInvoke = c.currentInterface == podFirstInterface
// underlay mode can't work with calico/cilium(overlay)
if !c.firstInvoke {
var exist bool
exist, err = networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}
if !c.firstInvoke && !vethExist {
return fmt.Errorf("when creating interface %s in underlay mode, it detects that the auxiliary interface %s was not created by previous interface. please enable coordinator plugin in previous interface", c.currentInterface, podFirstInterface)
}

if !exist {
return fmt.Errorf("when creating interface %s in underlay mode, it detects that the auxiliary interface %s was not created by previous interface. please enable coordinator plugin in previous interface", c.currentInterface, podFirstInterface)
}
// ensure that each NIC has a separate policy routing table number
if c.firstInvoke {
// keep table 100 for eth0, first non-eth0 nic is table 101
c.currentRuleTable = defaultPodRuleTable + 1
} else {
// for non-eth0 or non first-underlay nic, Policy routing
// table numbers are cumulative based on the number of NICs
// for example:
// there are veth0, eth0,net1,net2 nic, the policy routing table numbers
// of net2 is: 4 + 98 == 102.
c.currentRuleTable = len(links) + 98
}
return nil
case ModeOverlay:
Expand All @@ -98,16 +106,23 @@ func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirst
return fmt.Errorf("when creating interface %s in overlay mode, it detects that the current interface is first interface named %s, this plugin should not work for it. please modify in the CNI configuration", c.currentInterface, podFirstInterface)
}

exist, err := networking.CheckInterfaceExist(c.netns, defaultUnderlayVethName)
if err != nil {
return fmt.Errorf("failed to CheckInterfaceExist: %v", err)
}

if exist {
if vethExist {
return fmt.Errorf("when creating interface %s in overlay mode, it detects that the auxiliary interface %s of underlay mode exists. It seems that the previous interface work in underlay mode. ", c.currentInterface, defaultUnderlayVethName)
}

c.firstInvoke = c.podNics[0] == c.currentInterface
// if pod has only eth0 and net1, the first invoke is true
c.firstInvoke = len(links) == 2
if c.firstInvoke {
// keep table 100 for eth0, first non-eth0 nic is table 101
c.currentRuleTable = defaultPodRuleTable + 1
} else {
// for non-eth0 or non first-underlay nic, Policy routing
// table numbers are cumulative based on the number of NICs
// for example:
// there are eth0,net1,net2 nic, the policy routing table numbers
// of net2 is: 3 + 99 == 102.
c.currentRuleTable = 99 + len(links)
}
return nil
case ModeDisable:
return nil
Expand All @@ -116,21 +131,18 @@ func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirst
return fmt.Errorf("unknown tuneMode: %s", c.tuneMode)
}

// getRuleNumber return the number of rule table corresponding to the previous interface from the given interface.
// for example:
// input: net1, output: 100(eth0)
// input: net2, output: 101(net1)
func (c *coordinator) mustGetRuleNumber(spiderNics []string) int {
if len(spiderNics) == 0 {
return -1
}
func (c *coordinator) checkNICState(iface string) error {
return c.netns.Do(func(netNS ns.NetNS) error {
link, err := netlink.LinkByName(iface)
if err != nil {
return err
}

if c.currentInterface == defaultOverlayVethName {
return unix.RT_TABLE_MAIN
} else if spiderNics[0] == c.currentInterface {
return defaultPodRuleTable
}
return defaultPodRuleTable + len(spiderNics) - 1
if link.Attrs().Flags != net.FlagUp {
return netlink.LinkSetUp(link)
}
return nil
})
}

// setupVeth sets up a pair of virtual ethernet devices. move one to the host and other
Expand Down Expand Up @@ -306,22 +318,16 @@ func (c *coordinator) setupHijackRoutes(logger *zap.Logger, ruleTable int) error
src = c.v6PodOverlayNicAddr
}

if c.firstInvoke {
ruleTable = unix.RT_TABLE_MAIN
}

if err := networking.AddRoute(logger, ruleTable, c.ipFamily, netlink.SCOPE_UNIVERSE, c.podVethName, src, ipNet, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
logger.Error("failed to AddRoute for hijackCIDR", zap.String("Dst", ipNet.String()), zap.Error(err))
return fmt.Errorf("failed to AddRoute for hijackCIDR: %v", err)
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
if err := networking.AddRoute(logger, unix.RT_TABLE_MAIN, c.ipFamily, netlink.SCOPE_UNIVERSE, c.podVethName, src, ipNet, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
logger.Error("failed to AddRoute for hijackCIDR", zap.String("Dst", ipNet.String()), zap.Error(err))
return fmt.Errorf("failed to AddRoute for hijackCIDR: %v", err)
}
logger.Debug("Add Route for hijackSubnet in pod successfully", zap.String("Dst", ipNet.String()))
}

logger.Debug("AddRouteTable for localCIDRs successfully", zap.String("hijick cidr", hijack), zap.Int("Table", ruleTable))
}
logger.Debug("AddRouteTable for localCIDRs successfully", zap.Strings("localCIDRs", c.HijackCIDR))

return nil
})
return err
Expand All @@ -347,7 +353,7 @@ func (c *coordinator) setupHostRoutes(logger *zap.Logger) error {
return err
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
if c.firstInvoke {
if err = networking.AddRoute(logger, unix.RT_TABLE_MAIN, c.ipFamily, netlink.SCOPE_LINK, c.podVethName, src, ipNet, nil, nil); err != nil {
logger.Error("failed to AddRoute for ipAddressOnNode", zap.Error(err))
return err
Expand Down Expand Up @@ -488,13 +494,13 @@ func (c *coordinator) tunePodRoutes(logger *zap.Logger, configDefaultRouteNIC st
}

if c.tuneMode == ModeOverlay && c.firstInvoke {
// mv calico or cilium default route to table 500 to fix to the problem of
// mv calico or cilium default route to table 100 to fix to the problem of
// inconsistent routes, the pod forwards the response packet from net1 (macvlan)
// when it sends the response packet. but the request packet comes in eth0(calico).
// when it sends the response packet. but the request packet comes from eth0(calico).
// see https://github.com/spidernet-io/spiderpool/issues/3683

// copy to table 500,
podOverlayDefaultRouteRuleTable := c.hostRuleTable
// copy to table 100
podOverlayDefaultRouteRuleTable := defaultPodRuleTable
for idx := range defaultInterfaceAddress {
ipNet := networking.ConvertMaxMaskIPNet(defaultInterfaceAddress[idx].IP)
err = networking.AddFromRuleTable(ipNet, podOverlayDefaultRouteRuleTable)
Expand Down Expand Up @@ -555,7 +561,7 @@ func (c *coordinator) makeReplyPacketViaVeth(logger *zap.Logger) error {
}

for _, family := range ipFamily {
if err := networking.AddRuleTableWithMark(markInt, c.hostRuleTable, family); err != nil && !os.IsExist(err) {
if err := networking.AddRuleTableWithMark(markInt, defaultPodRuleTable, family); err != nil && !os.IsExist(err) {
return fmt.Errorf("failed to add rule table with mark: %v", err)
}

Expand All @@ -566,7 +572,7 @@ func (c *coordinator) makeReplyPacketViaVeth(logger *zap.Logger) error {
src = c.v6PodOverlayNicAddr
}

if err := networking.AddRoute(logger, c.hostRuleTable, family, netlink.SCOPE_UNIVERSE, c.podVethName, src, nil, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
if err := networking.AddRoute(logger, defaultPodRuleTable, family, netlink.SCOPE_UNIVERSE, c.podVethName, src, nil, c.v4HijackRouteGw, c.v6HijackRouteGw); err != nil {
return err
}
}
Expand Down
18 changes: 0 additions & 18 deletions cmd/spiderpool-agent/cmd/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
ctx := params.HTTPRequest.Context()
crdClient := agentContext.CRDManager.GetClient()
podClient := agentContext.PodManager
epClient := agentContext.EndpointManager
kubevirtMgr := agentContext.KubevirtManager

var coordList spiderpoolv2beta1.SpiderCoordinatorList
Expand All @@ -46,8 +45,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
}

var err error
var spNics []string
var se *spiderpoolv2beta1.SpiderEndpoint

var pod *corev1.Pod
pod, err = podClient.GetPodByName(ctx, params.GetCoordinatorConfig.PodNamespace, params.GetCoordinatorConfig.PodName, constant.UseCache)
Expand All @@ -57,19 +54,11 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig

isVMPod := false
// kubevirt vm pod corresponding SpiderEndpoint uses kubevirt VM/VMI name
endpointName := params.GetCoordinatorConfig.PodName
ownerReference := metav1.GetControllerOf(pod)
if ownerReference != nil && agentContext.Cfg.EnableKubevirtStaticIP && ownerReference.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerReference.Kind == constant.KindKubevirtVMI {
endpointName = ownerReference.Name
isVMPod = true
}

// get spiderendpoint
se, err = epClient.GetEndpointByName(ctx, params.GetCoordinatorConfig.PodNamespace, endpointName, constant.UseCache)
if err != nil && !apierrors.IsNotFound(err) {
return daemonset.NewGetCoordinatorConfigFailure().WithPayload(models.Error(fmt.Sprintf("failed to get spiderendpoint %s/%s", params.GetCoordinatorConfig.PodNamespace, params.GetCoordinatorConfig.PodName)))
}

// cancel IP conflict detection for the kubevirt vm live migration new pod
detectIPConflict := *coord.Spec.DetectIPConflict
if detectIPConflict && isVMPod {
Expand All @@ -95,12 +84,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
}
}

if se != nil {
for _, spip := range se.Status.Current.IPs {
spNics = append(spNics, spip.NIC)
}
}

var prefix string
if coord.Spec.PodMACPrefix != nil {
prefix = *coord.Spec.PodMACPrefix
Expand Down Expand Up @@ -129,7 +112,6 @@ func (g *_unixGetCoordinatorConfig) Handle(params daemonset.GetCoordinatorConfig
TxQueueLen: int64(*coord.Spec.TxQueueLen),
DetectGateway: *coord.Spec.DetectGateway,
DetectIPConflict: detectIPConflict,
PodNICs: spNics,
}

if config.OverlayPodCIDR == nil {
Expand Down
Loading

0 comments on commit c1fb610

Please sign in to comment.