From aa248b0ea48b09b5eb293b93f33c94f95928bc4b Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Mon, 2 Jan 2023 23:21:07 -0800 Subject: [PATCH 01/12] teamd: Add support for custom retry counts for LACP sessions Signed-off-by: Saikrishna Arcot --- .../patch/add-support-for-custom-retry.patch | 358 ++++++++++++++++++ .../patch/block-retry-count-changes.patch | 60 +++ src/libteam/patch/series | 2 + 3 files changed, 420 insertions(+) create mode 100644 src/libteam/patch/add-support-for-custom-retry.patch create mode 100644 src/libteam/patch/block-retry-count-changes.patch diff --git a/src/libteam/patch/add-support-for-custom-retry.patch b/src/libteam/patch/add-support-for-custom-retry.patch new file mode 100644 index 000000000000..6a24ce1a5151 --- /dev/null +++ b/src/libteam/patch/add-support-for-custom-retry.patch @@ -0,0 +1,358 @@ +Add support for custom retry counts for LACP sessions + +From: Saikrishna Arcot +Date: 2022-12-21 18:11:31 -0800 + +Add support for using custom retry count (instead of the default of 3) for LACP +sessions, to allow for sessions to stay up for more than 90 seconds. +--- + teamd/teamd_runner_lacp.c | 212 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 198 insertions(+), 14 deletions(-) + +diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c +index 82b8b86..f58ad3b 100644 +--- a/teamd/teamd_runner_lacp.c ++++ b/teamd/teamd_runner_lacp.c +@@ -77,22 +77,45 @@ struct lacpdu { + uint8_t collector_info_len; + uint16_t collector_max_delay; + uint8_t __reserved3[12]; +- uint8_t terminator_tlv_type; +- uint8_t terminator_info_len; +- uint8_t __reserved4[50]; ++ union { ++ struct { ++ uint8_t terminator_tlv_type; ++ uint8_t terminator_info_len; ++ uint8_t __reserved4[8]; ++ } __attribute__((__packed__)) v1; ++ struct { ++ uint8_t actor_retry_tlv_type; ++ uint8_t actor_retry_tlv_len; ++ uint8_t actor_retry_count; ++ uint8_t __reserved_a[1]; ++ uint8_t partner_retry_tlv_type; ++ uint8_t partner_retry_tlv_len; ++ uint8_t partner_retry_count; ++ uint8_t __reserved_b[1]; ++ uint8_t terminator_tlv_type; ++ uint8_t terminator_info_len; ++ } __attribute__((__packed__)) v2; ++ } __attribute__((__packed__)); ++ uint8_t __reserved5[42]; + } __attribute__((__packed__)); + +-static void lacpdu_init(struct lacpdu *lacpdu) ++static void lacpdu_init(struct lacpdu *lacpdu, uint8_t version) + { + memset(lacpdu, 0, sizeof(*lacpdu)); + lacpdu->subtype = 0x01; +- lacpdu->version_number = 0x01; ++ lacpdu->version_number = version; + lacpdu->actor_tlv_type = 0x01; + lacpdu->actor_info_len = 0x14; + lacpdu->partner_tlv_type = 0x02; + lacpdu->partner_info_len = 0x14; + lacpdu->collector_tlv_type = 0x03; + lacpdu->collector_info_len = 0x10; ++ if (version == 0xf1) { ++ lacpdu->v2.actor_retry_tlv_type = 0x80; ++ lacpdu->v2.actor_retry_tlv_len = 0x04; ++ lacpdu->v2.partner_retry_tlv_type = 0x81; ++ lacpdu->v2.partner_retry_tlv_len = 0x04; ++ } + } + + static bool lacpdu_check(struct lacpdu *lacpdu) +@@ -100,14 +123,31 @@ static bool lacpdu_check(struct lacpdu *lacpdu) + /* + * According to 43.4.12 version_number, tlv_type and reserved fields + * should not be checked. ++ * ++ * However, as part of 802.1ax, the version number is used to indicate ++ * whether there may be additional TLVs present or not, so it does ++ * need to be checked. + */ + +- if (lacpdu->subtype != 0x01 || +- lacpdu->actor_info_len != 0x14 || +- lacpdu->partner_info_len != 0x14 || +- lacpdu->collector_info_len != 0x10 || +- lacpdu->terminator_info_len != 0x00) ++ if (lacpdu->subtype != 0x01) ++ return false; ++ if (lacpdu->version_number == 0x01) { ++ if (lacpdu->actor_info_len != 0x14 || ++ lacpdu->partner_info_len != 0x14 || ++ lacpdu->collector_info_len != 0x10 || ++ lacpdu->v1.terminator_info_len != 0x00) ++ return false; ++ } else if (lacpdu->version_number == 0xf1) { ++ if (lacpdu->actor_info_len != 0x14 || ++ lacpdu->partner_info_len != 0x14 || ++ lacpdu->collector_info_len != 0x10 || ++ lacpdu->v2.actor_retry_tlv_len != 0x04 || ++ lacpdu->v2.partner_retry_tlv_len != 0x04 || ++ lacpdu->v2.terminator_info_len != 0x00) ++ return false; ++ } else { + return false; ++ } + return true; + } + +@@ -154,6 +194,8 @@ struct lacp { + #define LACP_CFG_DFLT_MIN_PORTS_MAX 1024 + enum lacp_agg_select_policy agg_select_policy; + #define LACP_CFG_DFLT_AGG_SELECT_POLICY LACP_AGG_SELECT_LACP_PRIO ++ uint8_t retry_count; ++#define LACP_CFG_DFLT_RETRY_COUNT 3 + } cfg; + struct { + bool carrier_up; +@@ -185,6 +227,7 @@ struct lacp_port { + struct lacpdu_info actor; + struct lacpdu_info partner; + struct lacpdu_info __partner_last; /* last state before update */ ++ int partner_retry_count; + bool periodic_on; + struct lacp_port *agg_lead; /* leading port of aggregator. + * NULL in case this port is not selected */ +@@ -1110,6 +1153,7 @@ static int slow_addr_del(struct lacp_port *lacp_port) + #define LACP_SOCKET_CB_NAME "lacp_socket" + #define LACP_PERIODIC_CB_NAME "lacp_periodic" + #define LACP_TIMEOUT_CB_NAME "lacp_timeout" ++#define LACP_RETRY_COUNT_TIMEOUT_CB_NAME "lacp_retry_count_timeout" + + static int lacp_port_timeout_set(struct lacp_port *lacp_port, bool fast_forced) + { +@@ -1119,7 +1163,7 @@ static int lacp_port_timeout_set(struct lacp_port *lacp_port, bool fast_forced) + + ms = fast_forced || lacp_port->lacp->cfg.fast_rate ? + LACP_PERIODIC_SHORT: LACP_PERIODIC_LONG; +- ms *= LACP_PERIODIC_MUL; ++ ms *= lacp_port->partner_retry_count; + ms_to_timespec(&ts, ms); + err = teamd_loop_callback_timer_set(lacp_port->ctx, + LACP_TIMEOUT_CB_NAME, +@@ -1286,6 +1330,9 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, + err = lacp_port_partner_update(lacp_port); + if (err) + return err; ++ lacp_port->lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ teamd_loop_callback_disable(lacp_port->ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp_port->lacp); + lacp_port_timeout_set(lacp_port, true); + teamd_loop_callback_enable(lacp_port->ctx, + LACP_TIMEOUT_CB_NAME, lacp_port); +@@ -1392,12 +1439,19 @@ static int lacpdu_send(struct lacp_port *lacp_port) + if (hwaddr_len != ETH_ALEN) + return 0; + +- lacpdu_init(&lacpdu); ++ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) ++ lacpdu_init(&lacpdu, 0xf1); ++ else ++ lacpdu_init(&lacpdu, 0x01); + lacpdu.actor = lacp_port->actor; + lacpdu.partner = lacp_port->partner; + memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); + memcpy(lacpdu.hdr.ether_dhost, ll_slow.sll_addr, ll_slow.sll_halen); + lacpdu.hdr.ether_type = htons(ETH_P_SLOW); ++ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { ++ lacpdu.v2.actor_retry_count = lacp_port->lacp->cfg.retry_count; ++ lacpdu.v2.partner_retry_count = lacp_port->partner_retry_count; ++ } + + err = teamd_send(lacp_port->sock, &lacpdu, sizeof(lacpdu), 0); + return err; +@@ -1428,6 +1482,26 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + return err; + } + ++ if (lacpdu->version_number == 0xf1) { ++ if (lacpdu->v2.actor_retry_count < 3) { ++ teamd_log_err("%s: retry count from partner (%u) out of its limits.", lacp_port->tdport->ifname, lacpdu->v2.actor_retry_count); ++ return -EINVAL; ++ } ++ if (lacp_port->partner_retry_count != lacpdu->v2.actor_retry_count) ++ teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ lacpdu->v2.actor_retry_count); ++ lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ } else { ++ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) ++ teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ LACP_CFG_DFLT_RETRY_COUNT); ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ } ++ + err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); + if (err) + return err; +@@ -1435,8 +1509,9 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + lacp_port_actor_update(lacp_port); + + /* Check if the other side has correct info about us */ +- if (memcmp(&lacpdu->partner, &lacp_port->actor, +- sizeof(struct lacpdu_info))) { ++ if (memcmp(&lacpdu->partner, &lacp_port->actor, sizeof(struct lacpdu_info)) ++ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) ++ || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { + err = lacpdu_send(lacp_port); + if (err) + return err; +@@ -1506,6 +1581,19 @@ static int lacp_callback_timeout(struct teamd_context *ctx, int events, + return err; + } + ++static int lacp_callback_retry_count_timeout(struct teamd_context *ctx, int events, ++ void *priv) ++{ ++ struct lacp *lacp = priv; ++ ++ teamd_log_dbg(ctx, "Retry count being reset to %u", ++ LACP_CFG_DFLT_RETRY_COUNT); ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ teamd_loop_callback_disable(ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); ++ return 0; ++} ++ + static int lacp_callback_periodic(struct teamd_context *ctx, int events, + void *priv) + { +@@ -1595,6 +1683,7 @@ static int lacp_port_added(struct teamd_context *ctx, + lacp_port->ctx = ctx; + lacp_port->tdport = tdport; + lacp_port->lacp = lacp; ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; + + err = lacp_port_load_config(ctx, lacp_port); + if (err) { +@@ -1947,6 +2036,88 @@ static int lacp_state_select_policy_get(struct teamd_context *ctx, + return 0; + } + ++static int lacp_state_retry_count_get(struct teamd_context *ctx, ++ struct team_state_gsc *gsc, ++ void *priv) ++{ ++ struct lacp *lacp = priv; ++ ++ gsc->data.int_val = lacp->cfg.retry_count; ++ return 0; ++} ++ ++struct lacp_state_retry_count_info { ++ struct teamd_workq workq; ++ struct lacp *lacp; ++ uint8_t retry_count; ++}; ++ ++static int lacp_state_retry_count_work(struct teamd_context *ctx, ++ struct teamd_workq *workq) ++{ ++ struct lacp_state_retry_count_info *info; ++ struct lacp *lacp; ++ struct teamd_port *tdport; ++ int ms; ++ struct timespec ts; ++ int err; ++ ++ info = get_container(workq, struct lacp_state_retry_count_info, workq); ++ lacp = info->lacp; ++ if (info->retry_count == lacp->cfg.retry_count) ++ return 0; ++ teamd_log_dbg(ctx, "Retry count manually changed from %u to %u", ++ lacp->cfg.retry_count, ++ info->retry_count); ++ lacp->cfg.retry_count = info->retry_count; ++ ms = lacp->cfg.retry_count * 3 * 60 * 1000; ++ ms_to_timespec(&ts, ms); ++ err = teamd_loop_callback_timer_set(lacp->ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, ++ lacp, NULL, &ts); ++ if (err) { ++ teamd_log_err("Failed to set retry count timeout timer."); ++ // Switch back to default now ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ return err; ++ } ++ teamd_loop_callback_enable(ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); ++ ++ teamd_for_each_tdport(tdport, lacp->ctx) { ++ struct lacp_port* lacp_port; ++ ++ lacp_port = lacp_port_get(lacp, tdport); ++ if (lacp_port_selected(lacp_port)) { ++ teamd_log_dbg(ctx, "%s: Notifying partner of updated retry count", ++ lacp_port->tdport->ifname); ++ lacpdu_send(lacp_port); ++ } ++ } ++ return 0; ++} ++ ++static int lacp_state_retry_count_set(struct teamd_context *ctx, ++ struct team_state_gsc *gsc, ++ void *priv) ++{ ++ struct lacp_state_retry_count_info *info; ++ struct lacp *lacp = priv; ++ ++ if (!gsc->data.int_val) ++ return -EOPNOTSUPP; ++ if (gsc->data.int_val < 3 || gsc->data.int_val > UCHAR_MAX) ++ return -EINVAL; ++ info = malloc(sizeof(*info)); ++ if (!info) ++ return -ENOMEM; ++ teamd_workq_init_work(&info->workq, lacp_state_retry_count_work); ++ info->lacp = lacp; ++ info->retry_count = gsc->data.int_val; ++ teamd_workq_schedule_work(ctx, &info->workq); ++ return 0; ++} ++ + static const struct teamd_state_val lacp_state_vals[] = { + { + .subpath = "active", +@@ -1973,6 +2144,12 @@ static const struct teamd_state_val lacp_state_vals[] = { + .type = TEAMD_STATE_ITEM_TYPE_STRING, + .getter = lacp_state_select_policy_get, + }, ++ { ++ .subpath = "retry_count", ++ .type = TEAMD_STATE_ITEM_TYPE_INT, ++ .getter = lacp_state_retry_count_get, ++ .setter = lacp_state_retry_count_set, ++ }, + }; + + static struct lacp_port *lacp_port_gsc(struct team_state_gsc *gsc, +@@ -2380,6 +2557,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) + teamd_log_err("Failed to register state groups."); + goto balancer_fini; + } ++ err = teamd_loop_callback_timer_add(ctx, LACP_RETRY_COUNT_TIMEOUT_CB_NAME, ++ lacp, lacp_callback_retry_count_timeout); ++ if (err) { ++ teamd_log_err("Failed to add retry count timeout callback timer"); ++ goto balancer_fini; ++ } + return 0; + + balancer_fini: +@@ -2395,6 +2578,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) + + if (ctx->lacp_directory) + lacp_state_save(ctx, lacp); ++ teamd_loop_callback_del(ctx, LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); + teamd_state_val_unregister(ctx, &lacp_state_vg, lacp); + teamd_balancer_fini(lacp->tb); + teamd_event_watch_unregister(ctx, &lacp_event_watch_ops, lacp); diff --git a/src/libteam/patch/block-retry-count-changes.patch b/src/libteam/patch/block-retry-count-changes.patch new file mode 100644 index 000000000000..ef2b577e8485 --- /dev/null +++ b/src/libteam/patch/block-retry-count-changes.patch @@ -0,0 +1,60 @@ +Don't reset the retry count after setting it for 60 seconds + +From: Saikrishna Arcot +Date: 2023-01-18 14:26:36 -0800 + +After setting the retry count to some custom value, if a normal LACP +packet comes in without a custom retry count, don't reset it back to +that custom retry count for the first 60 seconds. +--- + teamd/teamd_runner_lacp.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c +index f58ad3b..7df3bcb 100644 +--- a/teamd/teamd_runner_lacp.c ++++ b/teamd/teamd_runner_lacp.c +@@ -180,6 +180,7 @@ struct lacp { + struct lacp_port *selected_agg_lead; /* leading port of selected aggregator */ + bool carrier_up; + time_t warm_start_mode_timer; ++ time_t next_retry_count_change_time; + struct { + bool active; + #define LACP_CFG_DFLT_ACTIVE true +@@ -1457,6 +1458,8 @@ static int lacpdu_send(struct lacp_port *lacp_port) + return err; + } + ++#define LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT 60 ++ + static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + { + int err; +@@ -1493,13 +1496,20 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + lacp_port->partner_retry_count, + lacpdu->v2.actor_retry_count); + lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ lacp_port->lacp->next_retry_count_change_time = time(NULL) + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT; + } else { +- if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) +- teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", +- lacp_port->tdport->ifname, +- lacp_port->partner_retry_count, +- LACP_CFG_DFLT_RETRY_COUNT); +- lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ time_t currentTime = time(NULL); ++ if (currentTime < lacp_port->lacp->next_retry_count_change_time) { ++ teamd_log_dbg(lacp_port->ctx, "%s: retry count changed too recently", ++ lacp_port->tdport->ifname); ++ } else { ++ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) ++ teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ LACP_CFG_DFLT_RETRY_COUNT); ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ } + } + + err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); diff --git a/src/libteam/patch/series b/src/libteam/patch/series index cd7522918f43..e8505a64f7d6 100644 --- a/src/libteam/patch/series +++ b/src/libteam/patch/series @@ -10,3 +10,5 @@ 0010-When-read-of-timerfd-returned-0-don-t-consider-this-.patch 0011-Remove-extensive-debug-output.patch 0012-Increase-min_ports-upper-limit-to-1024.patch +add-support-for-custom-retry.patch +block-retry-count-changes.patch From b9daabbdd71c835286bd15f5d08010d39483f391 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 2 Feb 2023 12:28:48 -0800 Subject: [PATCH 02/12] Add _SECONDS to #define name to make it clear this is in seconds Signed-off-by: Saikrishna Arcot --- src/libteam/patch/block-retry-count-changes.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libteam/patch/block-retry-count-changes.patch b/src/libteam/patch/block-retry-count-changes.patch index ef2b577e8485..57bd4ac9dad3 100644 --- a/src/libteam/patch/block-retry-count-changes.patch +++ b/src/libteam/patch/block-retry-count-changes.patch @@ -26,7 +26,7 @@ index f58ad3b..7df3bcb 100644 return err; } -+#define LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT 60 ++#define LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS 60 + static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) { @@ -35,7 +35,7 @@ index f58ad3b..7df3bcb 100644 lacp_port->partner_retry_count, lacpdu->v2.actor_retry_count); lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; -+ lacp_port->lacp->next_retry_count_change_time = time(NULL) + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT; ++ lacp_port->lacp->next_retry_count_change_time = time(NULL) + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; } else { - if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) - teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", From ba9325cfee5154a12f1c2f2b9d802bfca893d6cd Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 2 Feb 2023 12:34:32 -0800 Subject: [PATCH 03/12] Fix mix of spaces vs tabs Signed-off-by: Saikrishna Arcot --- src/libteam/patch/add-support-for-custom-retry.patch | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libteam/patch/add-support-for-custom-retry.patch b/src/libteam/patch/add-support-for-custom-retry.patch index 6a24ce1a5151..019e904f0469 100644 --- a/src/libteam/patch/add-support-for-custom-retry.patch +++ b/src/libteam/patch/add-support-for-custom-retry.patch @@ -250,7 +250,7 @@ index 82b8b86..f58ad3b 100644 +struct lacp_state_retry_count_info { + struct teamd_workq workq; + struct lacp *lacp; -+ uint8_t retry_count; ++ uint8_t retry_count; +}; + +static int lacp_state_retry_count_work(struct teamd_context *ctx, @@ -270,7 +270,7 @@ index 82b8b86..f58ad3b 100644 + teamd_log_dbg(ctx, "Retry count manually changed from %u to %u", + lacp->cfg.retry_count, + info->retry_count); -+ lacp->cfg.retry_count = info->retry_count; ++ lacp->cfg.retry_count = info->retry_count; + ms = lacp->cfg.retry_count * 3 * 60 * 1000; + ms_to_timespec(&ts, ms); + err = teamd_loop_callback_timer_set(lacp->ctx, @@ -278,15 +278,15 @@ index 82b8b86..f58ad3b 100644 + lacp, NULL, &ts); + if (err) { + teamd_log_err("Failed to set retry count timeout timer."); -+ // Switch back to default now -+ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ // Switch back to default now ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; + return err; + } + teamd_loop_callback_enable(ctx, + LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); + + teamd_for_each_tdport(tdport, lacp->ctx) { -+ struct lacp_port* lacp_port; ++ struct lacp_port* lacp_port; + + lacp_port = lacp_port_get(lacp, tdport); + if (lacp_port_selected(lacp_port)) { From 4d7730fc284c530fed3b87396e3e08844e41dece Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 2 Feb 2023 14:04:31 -0800 Subject: [PATCH 04/12] Rename patches to include patch order number Signed-off-by: Saikrishna Arcot --- ...om-retry.patch => 0013-add-support-for-custom-retry.patch} | 0 ...unt-changes.patch => 0014-block-retry-count-changes.patch} | 0 src/libteam/patch/series | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/libteam/patch/{add-support-for-custom-retry.patch => 0013-add-support-for-custom-retry.patch} (100%) rename src/libteam/patch/{block-retry-count-changes.patch => 0014-block-retry-count-changes.patch} (100%) diff --git a/src/libteam/patch/add-support-for-custom-retry.patch b/src/libteam/patch/0013-add-support-for-custom-retry.patch similarity index 100% rename from src/libteam/patch/add-support-for-custom-retry.patch rename to src/libteam/patch/0013-add-support-for-custom-retry.patch diff --git a/src/libteam/patch/block-retry-count-changes.patch b/src/libteam/patch/0014-block-retry-count-changes.patch similarity index 100% rename from src/libteam/patch/block-retry-count-changes.patch rename to src/libteam/patch/0014-block-retry-count-changes.patch diff --git a/src/libteam/patch/series b/src/libteam/patch/series index e8505a64f7d6..c3dd1dffbc89 100644 --- a/src/libteam/patch/series +++ b/src/libteam/patch/series @@ -10,5 +10,5 @@ 0010-When-read-of-timerfd-returned-0-don-t-consider-this-.patch 0011-Remove-extensive-debug-output.patch 0012-Increase-min_ports-upper-limit-to-1024.patch -add-support-for-custom-retry.patch -block-retry-count-changes.patch +0013-add-support-for-custom-retry.patch +0014-block-retry-count-changes.patch From 304a4440e6869a75c5785432732a7203c662b8b0 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 16 Feb 2023 14:59:52 -0800 Subject: [PATCH 05/12] Use monotonic time instead of wall-clock time Also fix a bug with `next_retry_count_change_time` getting calculated repeatedly when a non-standard retry count is sent. Signed-off-by: Saikrishna Arcot --- .../0013-add-support-for-custom-retry.patch | 44 ++++++------ .../0014-block-retry-count-changes.patch | 68 ++++++++++++------- 2 files changed, 68 insertions(+), 44 deletions(-) diff --git a/src/libteam/patch/0013-add-support-for-custom-retry.patch b/src/libteam/patch/0013-add-support-for-custom-retry.patch index 019e904f0469..effa73bb9a88 100644 --- a/src/libteam/patch/0013-add-support-for-custom-retry.patch +++ b/src/libteam/patch/0013-add-support-for-custom-retry.patch @@ -6,11 +6,11 @@ Date: 2022-12-21 18:11:31 -0800 Add support for using custom retry count (instead of the default of 3) for LACP sessions, to allow for sessions to stay up for more than 90 seconds. --- - teamd/teamd_runner_lacp.c | 212 ++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 198 insertions(+), 14 deletions(-) + teamd/teamd_runner_lacp.c | 214 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 200 insertions(+), 14 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 82b8b86..f58ad3b 100644 +index 82b8b86..4b77692 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -77,22 +77,45 @@ struct lacpdu { @@ -166,7 +166,7 @@ index 82b8b86..f58ad3b 100644 err = teamd_send(lacp_port->sock, &lacpdu, sizeof(lacpdu), 0); return err; -@@ -1428,6 +1482,26 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1428,6 +1482,28 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) return err; } @@ -175,25 +175,27 @@ index 82b8b86..f58ad3b 100644 + teamd_log_err("%s: retry count from partner (%u) out of its limits.", lacp_port->tdport->ifname, lacpdu->v2.actor_retry_count); + return -EINVAL; + } -+ if (lacp_port->partner_retry_count != lacpdu->v2.actor_retry_count) ++ if (lacp_port->partner_retry_count != lacpdu->v2.actor_retry_count) { + teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", -+ lacp_port->tdport->ifname, -+ lacp_port->partner_retry_count, -+ lacpdu->v2.actor_retry_count); -+ lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ lacpdu->v2.actor_retry_count); ++ lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ } + } else { -+ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) ++ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { + teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", -+ lacp_port->tdport->ifname, -+ lacp_port->partner_retry_count, -+ LACP_CFG_DFLT_RETRY_COUNT); -+ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ LACP_CFG_DFLT_RETRY_COUNT); ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ } + } + err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); if (err) return err; -@@ -1435,8 +1509,9 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1435,8 +1511,9 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) lacp_port_actor_update(lacp_port); /* Check if the other side has correct info about us */ @@ -205,7 +207,7 @@ index 82b8b86..f58ad3b 100644 err = lacpdu_send(lacp_port); if (err) return err; -@@ -1506,6 +1581,19 @@ static int lacp_callback_timeout(struct teamd_context *ctx, int events, +@@ -1506,6 +1583,19 @@ static int lacp_callback_timeout(struct teamd_context *ctx, int events, return err; } @@ -225,7 +227,7 @@ index 82b8b86..f58ad3b 100644 static int lacp_callback_periodic(struct teamd_context *ctx, int events, void *priv) { -@@ -1595,6 +1683,7 @@ static int lacp_port_added(struct teamd_context *ctx, +@@ -1595,6 +1685,7 @@ static int lacp_port_added(struct teamd_context *ctx, lacp_port->ctx = ctx; lacp_port->tdport = tdport; lacp_port->lacp = lacp; @@ -233,7 +235,7 @@ index 82b8b86..f58ad3b 100644 err = lacp_port_load_config(ctx, lacp_port); if (err) { -@@ -1947,6 +2036,88 @@ static int lacp_state_select_policy_get(struct teamd_context *ctx, +@@ -1947,6 +2038,88 @@ static int lacp_state_select_policy_get(struct teamd_context *ctx, return 0; } @@ -322,7 +324,7 @@ index 82b8b86..f58ad3b 100644 static const struct teamd_state_val lacp_state_vals[] = { { .subpath = "active", -@@ -1973,6 +2144,12 @@ static const struct teamd_state_val lacp_state_vals[] = { +@@ -1973,6 +2146,12 @@ static const struct teamd_state_val lacp_state_vals[] = { .type = TEAMD_STATE_ITEM_TYPE_STRING, .getter = lacp_state_select_policy_get, }, @@ -335,7 +337,7 @@ index 82b8b86..f58ad3b 100644 }; static struct lacp_port *lacp_port_gsc(struct team_state_gsc *gsc, -@@ -2380,6 +2557,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) +@@ -2380,6 +2559,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) teamd_log_err("Failed to register state groups."); goto balancer_fini; } @@ -348,7 +350,7 @@ index 82b8b86..f58ad3b 100644 return 0; balancer_fini: -@@ -2395,6 +2578,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) +@@ -2395,6 +2580,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) if (ctx->lacp_directory) lacp_state_save(ctx, lacp); diff --git a/src/libteam/patch/0014-block-retry-count-changes.patch b/src/libteam/patch/0014-block-retry-count-changes.patch index 57bd4ac9dad3..5fccf3c9732f 100644 --- a/src/libteam/patch/0014-block-retry-count-changes.patch +++ b/src/libteam/patch/0014-block-retry-count-changes.patch @@ -7,11 +7,11 @@ After setting the retry count to some custom value, if a normal LACP packet comes in without a custom retry count, don't reset it back to that custom retry count for the first 60 seconds. --- - teamd/teamd_runner_lacp.c | 22 ++++++++++++++++------ - 1 file changed, 16 insertions(+), 6 deletions(-) + teamd/teamd_runner_lacp.c | 40 ++++++++++++++++++++++++++++++---------- + 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index f58ad3b..7df3bcb 100644 +index 4b77692..5a55bd5 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -180,6 +180,7 @@ struct lacp { @@ -22,7 +22,7 @@ index f58ad3b..7df3bcb 100644 struct { bool active; #define LACP_CFG_DFLT_ACTIVE true -@@ -1457,6 +1458,8 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -1457,9 +1458,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) return err; } @@ -31,30 +31,52 @@ index f58ad3b..7df3bcb 100644 static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) { int err; -@@ -1493,13 +1496,20 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) - lacp_port->partner_retry_count, - lacpdu->v2.actor_retry_count); - lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; -+ lacp_port->lacp->next_retry_count_change_time = time(NULL) + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; ++ struct timespec monotonic_time = {0}; + + if (!lacpdu_check(lacpdu)) { + teamd_log_warn("malformed LACP PDU came."); +@@ -1489,18 +1493,34 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + } + if (lacp_port->partner_retry_count != lacpdu->v2.actor_retry_count) { + teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", +- lacp_port->tdport->ifname, +- lacp_port->partner_retry_count, +- lacpdu->v2.actor_retry_count); +- lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; +- } ++ lacp_port->tdport->ifname, ++ lacp_port->partner_retry_count, ++ lacpdu->v2.actor_retry_count); ++ lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ if (clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { ++ err = errno; ++ teamd_log_err("%s: unable to get current time: %s", lacp_port->tdport->ifname, strerror(err)); ++ return -err; ++ } ++ lacp_port->lacp->next_retry_count_change_time = monotonic_time.tv_sec + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; ++ } } else { -- if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) + if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { - teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", -- lacp_port->tdport->ifname, -- lacp_port->partner_retry_count, -- LACP_CFG_DFLT_RETRY_COUNT); -- lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; -+ time_t currentTime = time(NULL); -+ if (currentTime < lacp_port->lacp->next_retry_count_change_time) { -+ teamd_log_dbg(lacp_port->ctx, "%s: retry count changed too recently", -+ lacp_port->tdport->ifname); -+ } else { -+ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) +- lacp_port->tdport->ifname, +- lacp_port->partner_retry_count, +- LACP_CFG_DFLT_RETRY_COUNT); +- lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ if (clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { ++ err = errno; ++ teamd_log_err("%s: unable to get current time: %s", lacp_port->tdport->ifname, strerror(err)); ++ return -err; ++ } ++ if (monotonic_time.tv_sec < lacp_port->lacp->next_retry_count_change_time) { ++ teamd_log_dbg(lacp_port->ctx, "%s: retry count changed too recently", ++ lacp_port->tdport->ifname); ++ } else { + teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", + lacp_port->tdport->ifname, + lacp_port->partner_retry_count, + LACP_CFG_DFLT_RETRY_COUNT); -+ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; -+ } ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ } + } } - err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); From ec258edb952879d9000026402b8ea92881199a43 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 16 Feb 2023 16:44:42 -0800 Subject: [PATCH 06/12] Replace magic number of 3 with existing define Signed-off-by: Saikrishna Arcot --- src/libteam/patch/0013-add-support-for-custom-retry.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libteam/patch/0013-add-support-for-custom-retry.patch b/src/libteam/patch/0013-add-support-for-custom-retry.patch index effa73bb9a88..73068363777e 100644 --- a/src/libteam/patch/0013-add-support-for-custom-retry.patch +++ b/src/libteam/patch/0013-add-support-for-custom-retry.patch @@ -171,7 +171,7 @@ index 82b8b86..4b77692 100644 } + if (lacpdu->version_number == 0xf1) { -+ if (lacpdu->v2.actor_retry_count < 3) { ++ if (lacpdu->v2.actor_retry_count < LACP_CFG_DFLT_RETRY_COUNT) { + teamd_log_err("%s: retry count from partner (%u) out of its limits.", lacp_port->tdport->ifname, lacpdu->v2.actor_retry_count); + return -EINVAL; + } From 27cdefbc3d1c2e2ee7bc10502129f1049378022c Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Wed, 19 Apr 2023 17:43:53 -0700 Subject: [PATCH 07/12] Add support for showing the partner's retry count Also fix a logic error when filling in the retry count fields in the LACP PDU packet. Signed-off-by: Saikrishna Arcot --- .../0015-add-support-for-custom-retry.patch | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/libteam/patch/0015-add-support-for-custom-retry.patch b/src/libteam/patch/0015-add-support-for-custom-retry.patch index 73068363777e..c01114ecc05a 100644 --- a/src/libteam/patch/0015-add-support-for-custom-retry.patch +++ b/src/libteam/patch/0015-add-support-for-custom-retry.patch @@ -6,11 +6,11 @@ Date: 2022-12-21 18:11:31 -0800 Add support for using custom retry count (instead of the default of 3) for LACP sessions, to allow for sessions to stay up for more than 90 seconds. --- - teamd/teamd_runner_lacp.c | 214 ++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 200 insertions(+), 14 deletions(-) + teamd/teamd_runner_lacp.c | 227 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 213 insertions(+), 14 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 82b8b86..4b77692 100644 +index 82b8b86..5c7e7b4 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -77,22 +77,45 @@ struct lacpdu { @@ -159,7 +159,7 @@ index 82b8b86..4b77692 100644 memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); memcpy(lacpdu.hdr.ether_dhost, ll_slow.sll_addr, ll_slow.sll_halen); lacpdu.hdr.ether_type = htons(ETH_P_SLOW); -+ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { ++ if (lacpdu.version_number == 0xf1) { + lacpdu.v2.actor_retry_count = lacp_port->lacp->cfg.retry_count; + lacpdu.v2.partner_retry_count = lacp_port->partner_retry_count; + } @@ -337,7 +337,34 @@ index 82b8b86..4b77692 100644 }; static struct lacp_port *lacp_port_gsc(struct team_state_gsc *gsc, -@@ -2380,6 +2559,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) +@@ -2272,6 +2451,14 @@ static int lacp_port_state_prio_get(struct teamd_context *ctx, + return 0; + } + ++static int lacp_port_partner_retry_count_get(struct teamd_context *ctx, ++ struct team_state_gsc *gsc, ++ void *priv) ++{ ++ gsc->data.int_val = lacp_port_gsc(gsc, priv)->partner_retry_count; ++ return 0; ++} ++ + static const struct teamd_state_val lacp_port_state_vals[] = { + { + .subpath = "selected", +@@ -2314,6 +2501,11 @@ static const struct teamd_state_val lacp_port_state_vals[] = { + .vals = lacp_port_partner_state_vals, + .vals_count = ARRAY_SIZE(lacp_port_partner_state_vals), + }, ++ { ++ .subpath = "partner_retry_count", ++ .type = TEAMD_STATE_ITEM_TYPE_INT, ++ .getter = lacp_port_partner_retry_count_get, ++ }, + }; + + static const struct teamd_state_val lacp_state_vgs[] = { +@@ -2380,6 +2572,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) teamd_log_err("Failed to register state groups."); goto balancer_fini; } @@ -350,7 +377,7 @@ index 82b8b86..4b77692 100644 return 0; balancer_fini: -@@ -2395,6 +2580,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) +@@ -2395,6 +2593,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) if (ctx->lacp_directory) lacp_state_save(ctx, lacp); From 51b1191b946bf7eb0f416cc9354fd0ee66b89000 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Wed, 19 Apr 2023 18:00:00 -0700 Subject: [PATCH 08/12] Make changes to the transition mechanism First, reply with the most recently received LACP PDU version. This is so that if one side is sending both the old and new versions, then the peer side will send both versions to keep both listeners happy. This also allows for using the new version packets even when the retry count is 3 on both sides. This can only be triggered remotely; there's no local command to force a particular version to be used. Second, reset the 60-second timer preventing the retry count from being reset back to 3 every time a new version PDU is received. This keeps that setting persistent for longer with the downside of a delayed response to the retry count being reset to 3. Signed-off-by: Saikrishna Arcot --- .../0016-block-retry-count-changes.patch | 86 +++++++++++++------ 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/src/libteam/patch/0016-block-retry-count-changes.patch b/src/libteam/patch/0016-block-retry-count-changes.patch index 5fccf3c9732f..083ea9b2f86c 100644 --- a/src/libteam/patch/0016-block-retry-count-changes.patch +++ b/src/libteam/patch/0016-block-retry-count-changes.patch @@ -5,13 +5,13 @@ Date: 2023-01-18 14:26:36 -0800 After setting the retry count to some custom value, if a normal LACP packet comes in without a custom retry count, don't reset it back to -that custom retry count for the first 60 seconds. +that custom retry count for 60 seconds since the last new packet. --- - teamd/teamd_runner_lacp.c | 40 ++++++++++++++++++++++++++++++---------- - 1 file changed, 30 insertions(+), 10 deletions(-) + teamd/teamd_runner_lacp.c | 44 +++++++++++++++++++++++++++++++++++--------- + 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 4b77692..5a55bd5 100644 +index 5c7e7b4..4f971d9 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -180,6 +180,7 @@ struct lacp { @@ -22,7 +22,36 @@ index 4b77692..5a55bd5 100644 struct { bool active; #define LACP_CFG_DFLT_ACTIVE true -@@ -1457,9 +1458,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -232,6 +233,7 @@ struct lacp_port { + struct lacp_port *agg_lead; /* leading port of aggregator. + * NULL in case this port is not selected */ + enum lacp_port_state state; ++ int last_received_lacpdu_version; + bool lacpdu_saved; + struct lacpdu last_pdu; + struct { +@@ -1331,6 +1333,7 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, + if (err) + return err; + lacp_port->lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ lacp_port->last_received_lacpdu_version = 0x01; + teamd_loop_callback_disable(lacp_port->ctx, + LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp_port->lacp); + lacp_port_timeout_set(lacp_port, true); +@@ -1439,10 +1442,10 @@ static int lacpdu_send(struct lacp_port *lacp_port) + if (hwaddr_len != ETH_ALEN) + return 0; + +- if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) ++ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT) + lacpdu_init(&lacpdu, 0xf1); + else +- lacpdu_init(&lacpdu, 0x01); ++ lacpdu_init(&lacpdu, lacp_port->last_received_lacpdu_version); + lacpdu.actor = lacp_port->actor; + lacpdu.partner = lacp_port->partner; + memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); +@@ -1457,9 +1460,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) return err; } @@ -35,26 +64,20 @@ index 4b77692..5a55bd5 100644 if (!lacpdu_check(lacpdu)) { teamd_log_warn("malformed LACP PDU came."); -@@ -1489,18 +1493,34 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1493,17 +1499,38 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + lacp_port->partner_retry_count, + lacpdu->v2.actor_retry_count); + lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; ++ if (clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { ++ err = errno; ++ teamd_log_err("%s: unable to get current time: %s", lacp_port->tdport->ifname, strerror(err)); ++ return -err; ++ } ++ } ++ if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { ++ // Reset the change time every time a 0xf1 packet comes in ++ lacp_port->lacp->next_retry_count_change_time = monotonic_time.tv_sec + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; } - if (lacp_port->partner_retry_count != lacpdu->v2.actor_retry_count) { - teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", -- lacp_port->tdport->ifname, -- lacp_port->partner_retry_count, -- lacpdu->v2.actor_retry_count); -- lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; -- } -+ lacp_port->tdport->ifname, -+ lacp_port->partner_retry_count, -+ lacpdu->v2.actor_retry_count); -+ lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; -+ if (clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { -+ err = errno; -+ teamd_log_err("%s: unable to get current time: %s", lacp_port->tdport->ifname, strerror(err)); -+ return -err; -+ } -+ lacp_port->lacp->next_retry_count_change_time = monotonic_time.tv_sec + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; -+ } } else { if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { - teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", @@ -80,3 +103,18 @@ index 4b77692..5a55bd5 100644 } } ++ lacp_port->last_received_lacpdu_version = lacpdu->version_number; ++ + err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); + if (err) + return err; +@@ -1512,8 +1539,7 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) + + /* Check if the other side has correct info about us */ + if (memcmp(&lacpdu->partner, &lacp_port->actor, sizeof(struct lacpdu_info)) +- || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) +- || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { ++ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count)) { + err = lacpdu_send(lacp_port); + if (err) + return err; From e977386dd67c47c472b5f68596648edb8aa7b961 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Tue, 25 Apr 2023 15:27:33 -0700 Subject: [PATCH 09/12] Send LACPDU immediately on version change After getting a LACPDU that uses a diferent LACPDU version, immediately send a LACPDU back. Signed-off-by: Saikrishna Arcot --- .../0016-block-retry-count-changes.patch | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/libteam/patch/0016-block-retry-count-changes.patch b/src/libteam/patch/0016-block-retry-count-changes.patch index 083ea9b2f86c..96626571b1e5 100644 --- a/src/libteam/patch/0016-block-retry-count-changes.patch +++ b/src/libteam/patch/0016-block-retry-count-changes.patch @@ -7,11 +7,11 @@ After setting the retry count to some custom value, if a normal LACP packet comes in without a custom retry count, don't reset it back to that custom retry count for 60 seconds since the last new packet. --- - teamd/teamd_runner_lacp.c | 44 +++++++++++++++++++++++++++++++++++--------- - 1 file changed, 35 insertions(+), 9 deletions(-) + teamd/teamd_runner_lacp.c | 56 ++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 5c7e7b4..4f971d9 100644 +index 6731b59..58fa918 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -180,6 +180,7 @@ struct lacp { @@ -30,7 +30,7 @@ index 5c7e7b4..4f971d9 100644 bool lacpdu_saved; struct lacpdu last_pdu; struct { -@@ -1331,6 +1333,7 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, +@@ -1333,6 +1335,7 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, if (err) return err; lacp_port->lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; @@ -38,7 +38,7 @@ index 5c7e7b4..4f971d9 100644 teamd_loop_callback_disable(lacp_port->ctx, LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp_port->lacp); lacp_port_timeout_set(lacp_port, true); -@@ -1439,10 +1442,10 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -1441,10 +1444,10 @@ static int lacpdu_send(struct lacp_port *lacp_port) if (hwaddr_len != ETH_ALEN) return 0; @@ -51,7 +51,7 @@ index 5c7e7b4..4f971d9 100644 lacpdu.actor = lacp_port->actor; lacpdu.partner = lacp_port->partner; memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); -@@ -1457,9 +1460,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -1459,9 +1462,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) return err; } @@ -64,7 +64,7 @@ index 5c7e7b4..4f971d9 100644 if (!lacpdu_check(lacpdu)) { teamd_log_warn("malformed LACP PDU came."); -@@ -1493,17 +1499,38 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1495,17 +1501,50 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) lacp_port->partner_retry_count, lacpdu->v2.actor_retry_count); lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; @@ -103,12 +103,24 @@ index 5c7e7b4..4f971d9 100644 } } -+ lacp_port->last_received_lacpdu_version = lacpdu->version_number; ++ if (lacp_port->last_received_lacpdu_version != lacpdu->version_number) { ++ teamd_log_dbg(lacp_port->ctx, "%s: LACPDU version changed from %u to %u", ++ lacp_port->tdport->ifname, ++ lacp_port->last_received_lacpdu_version, ++ lacpdu->version_number); ++ lacp_port->last_received_lacpdu_version = lacpdu->version_number; ++ // Force-send a LACPDU packet acknowledging change in version ++ err = lacpdu_send(lacp_port); ++ if (err) ++ return err; ++ } else { ++ lacp_port->last_received_lacpdu_version = lacpdu->version_number; ++ } + err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); if (err) return err; -@@ -1512,8 +1539,7 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1514,8 +1553,7 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) /* Check if the other side has correct info about us */ if (memcmp(&lacpdu->partner, &lacp_port->actor, sizeof(struct lacpdu_info)) From 357cd54dccbdf9a0fd559208db703c8b91ff304a Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 4 May 2023 11:40:02 -0700 Subject: [PATCH 10/12] Add option to disable retry count feature, and fix some bugs Signed-off-by: Saikrishna Arcot --- .../0015-add-support-for-custom-retry.patch | 190 ++++++++++++++---- .../0016-block-retry-count-changes.patch | 63 +++--- 2 files changed, 190 insertions(+), 63 deletions(-) diff --git a/src/libteam/patch/0015-add-support-for-custom-retry.patch b/src/libteam/patch/0015-add-support-for-custom-retry.patch index c01114ecc05a..ab69af07ea02 100644 --- a/src/libteam/patch/0015-add-support-for-custom-retry.patch +++ b/src/libteam/patch/0015-add-support-for-custom-retry.patch @@ -6,11 +6,11 @@ Date: 2022-12-21 18:11:31 -0800 Add support for using custom retry count (instead of the default of 3) for LACP sessions, to allow for sessions to stay up for more than 90 seconds. --- - teamd/teamd_runner_lacp.c | 227 ++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 213 insertions(+), 14 deletions(-) + teamd/teamd_runner_lacp.c | 338 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 324 insertions(+), 14 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 82b8b86..5c7e7b4 100644 +index 6b43916..d73ee1c 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -77,22 +77,45 @@ struct lacpdu { @@ -101,16 +101,18 @@ index 82b8b86..5c7e7b4 100644 return true; } -@@ -154,6 +194,8 @@ struct lacp { +@@ -154,6 +194,10 @@ struct lacp { #define LACP_CFG_DFLT_MIN_PORTS_MAX 1024 enum lacp_agg_select_policy agg_select_policy; #define LACP_CFG_DFLT_AGG_SELECT_POLICY LACP_AGG_SELECT_LACP_PRIO ++ bool enable_retry_count; ++#define LACP_CFG_DFLT_ENABLE_RETRY_COUNT true + uint8_t retry_count; +#define LACP_CFG_DFLT_RETRY_COUNT 3 } cfg; struct { bool carrier_up; -@@ -185,6 +227,7 @@ struct lacp_port { +@@ -185,6 +229,7 @@ struct lacp_port { struct lacpdu_info actor; struct lacpdu_info partner; struct lacpdu_info __partner_last; /* last state before update */ @@ -118,7 +120,30 @@ index 82b8b86..5c7e7b4 100644 bool periodic_on; struct lacp_port *agg_lead; /* leading port of aggregator. * NULL in case this port is not selected */ -@@ -1110,6 +1153,7 @@ static int slow_addr_del(struct lacp_port *lacp_port) +@@ -513,6 +558,22 @@ static int lacp_load_config(struct teamd_context *ctx, struct lacp *lacp) + } + teamd_log_dbg(ctx, "Using agg_select_policy \"%s\".", + lacp_get_agg_select_policy_name(lacp)); ++ ++ err = teamd_config_bool_get(ctx, &lacp->cfg.enable_retry_count, "$.runner.enable_retry_count"); ++ if (err) ++ lacp->cfg.enable_retry_count = LACP_CFG_DFLT_ENABLE_RETRY_COUNT; ++ teamd_log_dbg(ctx, "Using enable_retry_count \"%d\".", lacp->cfg.enable_retry_count); ++ ++ err = teamd_config_int_get(ctx, &tmp, "$.runner.retry_count"); ++ if (err) { ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ } else if (tmp < 3) { ++ teamd_log_err("\"retry_count\" value is out of its limits."); ++ return -EINVAL; ++ } else { ++ lacp->cfg.retry_count = tmp; ++ } ++ teamd_log_dbg(ctx, "Using retry_count \"%d\".", lacp->cfg.retry_count); + return 0; + } + +@@ -1110,6 +1171,7 @@ static int slow_addr_del(struct lacp_port *lacp_port) #define LACP_SOCKET_CB_NAME "lacp_socket" #define LACP_PERIODIC_CB_NAME "lacp_periodic" #define LACP_TIMEOUT_CB_NAME "lacp_timeout" @@ -126,7 +151,7 @@ index 82b8b86..5c7e7b4 100644 static int lacp_port_timeout_set(struct lacp_port *lacp_port, bool fast_forced) { -@@ -1119,7 +1163,7 @@ static int lacp_port_timeout_set(struct lacp_port *lacp_port, bool fast_forced) +@@ -1119,7 +1181,7 @@ static int lacp_port_timeout_set(struct lacp_port *lacp_port, bool fast_forced) ms = fast_forced || lacp_port->lacp->cfg.fast_rate ? LACP_PERIODIC_SHORT: LACP_PERIODIC_LONG; @@ -135,7 +160,7 @@ index 82b8b86..5c7e7b4 100644 ms_to_timespec(&ts, ms); err = teamd_loop_callback_timer_set(lacp_port->ctx, LACP_TIMEOUT_CB_NAME, -@@ -1286,6 +1330,9 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, +@@ -1288,6 +1350,9 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, err = lacp_port_partner_update(lacp_port); if (err) return err; @@ -145,15 +170,21 @@ index 82b8b86..5c7e7b4 100644 lacp_port_timeout_set(lacp_port, true); teamd_loop_callback_enable(lacp_port->ctx, LACP_TIMEOUT_CB_NAME, lacp_port); -@@ -1392,12 +1439,19 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -1394,12 +1459,25 @@ static int lacpdu_send(struct lacp_port *lacp_port) if (hwaddr_len != ETH_ALEN) return 0; - lacpdu_init(&lacpdu); -+ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) -+ lacpdu_init(&lacpdu, 0xf1); -+ else ++ if (lacp_port->lacp->cfg.enable_retry_count) { ++ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT ++ || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { ++ lacpdu_init(&lacpdu, 0xf1); ++ } else { ++ lacpdu_init(&lacpdu, 0x01); ++ } ++ } else { + lacpdu_init(&lacpdu, 0x01); ++ } lacpdu.actor = lacp_port->actor; lacpdu.partner = lacp_port->partner; memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); @@ -166,11 +197,15 @@ index 82b8b86..5c7e7b4 100644 err = teamd_send(lacp_port->sock, &lacpdu, sizeof(lacpdu), 0); return err; -@@ -1428,6 +1482,28 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1430,6 +1508,32 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) return err; } + if (lacpdu->version_number == 0xf1) { ++ if (!lacp_port->lacp->cfg.enable_retry_count) { ++ teamd_log_err("%s: Received 0xf1 LACPDU packet while retry count feature is disabled.", lacp_port->tdport->ifname); ++ return -EINVAL; ++ } + if (lacpdu->v2.actor_retry_count < LACP_CFG_DFLT_RETRY_COUNT) { + teamd_log_err("%s: retry count from partner (%u) out of its limits.", lacp_port->tdport->ifname, lacpdu->v2.actor_retry_count); + return -EINVAL; @@ -195,19 +230,19 @@ index 82b8b86..5c7e7b4 100644 err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); if (err) return err; -@@ -1435,8 +1511,9 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1437,8 +1541,9 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) lacp_port_actor_update(lacp_port); /* Check if the other side has correct info about us */ - if (memcmp(&lacpdu->partner, &lacp_port->actor, - sizeof(struct lacpdu_info))) { + if (memcmp(&lacpdu->partner, &lacp_port->actor, sizeof(struct lacpdu_info)) -+ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) -+ || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { ++ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) ++ || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { err = lacpdu_send(lacp_port); if (err) return err; -@@ -1506,6 +1583,19 @@ static int lacp_callback_timeout(struct teamd_context *ctx, int events, +@@ -1508,6 +1613,19 @@ static int lacp_callback_timeout(struct teamd_context *ctx, int events, return err; } @@ -227,7 +262,7 @@ index 82b8b86..5c7e7b4 100644 static int lacp_callback_periodic(struct teamd_context *ctx, int events, void *priv) { -@@ -1595,6 +1685,7 @@ static int lacp_port_added(struct teamd_context *ctx, +@@ -1597,6 +1715,7 @@ static int lacp_port_added(struct teamd_context *ctx, lacp_port->ctx = ctx; lacp_port->tdport = tdport; lacp_port->lacp = lacp; @@ -235,10 +270,80 @@ index 82b8b86..5c7e7b4 100644 err = lacp_port_load_config(ctx, lacp_port); if (err) { -@@ -1947,6 +2038,88 @@ static int lacp_state_select_policy_get(struct teamd_context *ctx, +@@ -1961,6 +2080,165 @@ static int lacp_state_select_policy_get(struct teamd_context *ctx, return 0; } ++static int lacp_state_enable_retry_count_get(struct teamd_context *ctx, ++ struct team_state_gsc *gsc, ++ void *priv) ++{ ++ struct lacp *lacp = priv; ++ ++ gsc->data.bool_val = lacp->cfg.enable_retry_count; ++ return 0; ++} ++ ++struct lacp_state_enable_retry_count_info { ++ struct teamd_workq workq; ++ struct lacp *lacp; ++ bool enable_retry_count; ++}; ++ ++static int lacp_state_enable_retry_count_work(struct teamd_context *ctx, ++ struct teamd_workq *workq) ++{ ++ struct lacp_state_enable_retry_count_info *info; ++ struct lacp *lacp; ++ struct teamd_port *tdport; ++ ++ info = get_container(workq, struct lacp_state_enable_retry_count_info, workq); ++ lacp = info->lacp; ++ if (info->enable_retry_count == lacp->cfg.enable_retry_count) ++ return 0; ++ lacp->cfg.enable_retry_count = info->enable_retry_count; ++ teamd_log_dbg(ctx, "Retry count feature is set to %d", ++ lacp->cfg.enable_retry_count); ++ ++ if (lacp->cfg.enable_retry_count) ++ return 0; ++ ++ // Reset all retry counts to the default value ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ teamd_loop_callback_disable(ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); ++ ++ teamd_for_each_tdport(tdport, lacp->ctx) { ++ struct lacp_port* lacp_port; ++ ++ lacp_port = lacp_port_get(lacp, tdport); ++ lacp_port->partner_retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ if (lacp_port_selected(lacp_port)) { ++ teamd_log_dbg(ctx, "%s: Notifying partner of retry count reset", ++ lacp_port->tdport->ifname); ++ lacpdu_send(lacp_port); ++ } ++ } ++ return 0; ++} ++ ++static int lacp_state_enable_retry_count_set(struct teamd_context *ctx, ++ struct team_state_gsc *gsc, ++ void *priv) ++{ ++ struct lacp_state_enable_retry_count_info *info; ++ struct lacp *lacp = priv; ++ ++ info = malloc(sizeof(*info)); ++ if (!info) ++ return -ENOMEM; ++ teamd_workq_init_work(&info->workq, lacp_state_enable_retry_count_work); ++ info->lacp = lacp; ++ info->enable_retry_count = gsc->data.bool_val; ++ teamd_workq_schedule_work(ctx, &info->workq); ++ return 0; ++} ++ +static int lacp_state_retry_count_get(struct teamd_context *ctx, + struct team_state_gsc *gsc, + void *priv) @@ -273,19 +378,24 @@ index 82b8b86..5c7e7b4 100644 + lacp->cfg.retry_count, + info->retry_count); + lacp->cfg.retry_count = info->retry_count; -+ ms = lacp->cfg.retry_count * 3 * 60 * 1000; -+ ms_to_timespec(&ts, ms); -+ err = teamd_loop_callback_timer_set(lacp->ctx, -+ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, -+ lacp, NULL, &ts); -+ if (err) { -+ teamd_log_err("Failed to set retry count timeout timer."); -+ // Switch back to default now -+ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; -+ return err; ++ if (lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT) { ++ ms = lacp->cfg.retry_count * 3 * 60 * 1000; ++ ms_to_timespec(&ts, ms); ++ err = teamd_loop_callback_timer_set(lacp->ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, ++ lacp, NULL, &ts); ++ if (err) { ++ teamd_log_err("Failed to set retry count timeout timer."); ++ // Switch back to default now ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; ++ return err; ++ } ++ teamd_loop_callback_enable(ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); ++ } else { ++ teamd_loop_callback_disable(ctx, ++ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); + } -+ teamd_loop_callback_enable(ctx, -+ LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp); + + teamd_for_each_tdport(tdport, lacp->ctx) { + struct lacp_port* lacp_port; @@ -309,6 +419,8 @@ index 82b8b86..5c7e7b4 100644 + + if (!gsc->data.int_val) + return -EOPNOTSUPP; ++ if (!lacp->cfg.enable_retry_count) ++ return -EOPNOTSUPP; + if (gsc->data.int_val < 3 || gsc->data.int_val > UCHAR_MAX) + return -EINVAL; + info = malloc(sizeof(*info)); @@ -324,11 +436,17 @@ index 82b8b86..5c7e7b4 100644 static const struct teamd_state_val lacp_state_vals[] = { { .subpath = "active", -@@ -1973,6 +2146,12 @@ static const struct teamd_state_val lacp_state_vals[] = { +@@ -1987,6 +2265,18 @@ static const struct teamd_state_val lacp_state_vals[] = { .type = TEAMD_STATE_ITEM_TYPE_STRING, .getter = lacp_state_select_policy_get, }, + { ++ .subpath = "enable_retry_count_feature", ++ .type = TEAMD_STATE_ITEM_TYPE_BOOL, ++ .getter = lacp_state_enable_retry_count_get, ++ .setter = lacp_state_enable_retry_count_set, ++ }, ++ { + .subpath = "retry_count", + .type = TEAMD_STATE_ITEM_TYPE_INT, + .getter = lacp_state_retry_count_get, @@ -337,7 +455,7 @@ index 82b8b86..5c7e7b4 100644 }; static struct lacp_port *lacp_port_gsc(struct team_state_gsc *gsc, -@@ -2272,6 +2451,14 @@ static int lacp_port_state_prio_get(struct teamd_context *ctx, +@@ -2286,6 +2576,14 @@ static int lacp_port_state_prio_get(struct teamd_context *ctx, return 0; } @@ -352,7 +470,7 @@ index 82b8b86..5c7e7b4 100644 static const struct teamd_state_val lacp_port_state_vals[] = { { .subpath = "selected", -@@ -2314,6 +2501,11 @@ static const struct teamd_state_val lacp_port_state_vals[] = { +@@ -2328,6 +2626,11 @@ static const struct teamd_state_val lacp_port_state_vals[] = { .vals = lacp_port_partner_state_vals, .vals_count = ARRAY_SIZE(lacp_port_partner_state_vals), }, @@ -364,7 +482,7 @@ index 82b8b86..5c7e7b4 100644 }; static const struct teamd_state_val lacp_state_vgs[] = { -@@ -2380,6 +2572,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) +@@ -2394,6 +2697,12 @@ static int lacp_init(struct teamd_context *ctx, void *priv) teamd_log_err("Failed to register state groups."); goto balancer_fini; } @@ -377,7 +495,7 @@ index 82b8b86..5c7e7b4 100644 return 0; balancer_fini: -@@ -2395,6 +2593,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) +@@ -2409,6 +2718,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv) if (ctx->lacp_directory) lacp_state_save(ctx, lacp); diff --git a/src/libteam/patch/0016-block-retry-count-changes.patch b/src/libteam/patch/0016-block-retry-count-changes.patch index 96626571b1e5..80fa610e8a0e 100644 --- a/src/libteam/patch/0016-block-retry-count-changes.patch +++ b/src/libteam/patch/0016-block-retry-count-changes.patch @@ -7,11 +7,11 @@ After setting the retry count to some custom value, if a normal LACP packet comes in without a custom retry count, don't reset it back to that custom retry count for 60 seconds since the last new packet. --- - teamd/teamd_runner_lacp.c | 56 ++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 47 insertions(+), 9 deletions(-) + teamd/teamd_runner_lacp.c | 60 +++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 6731b59..58fa918 100644 +index d73ee1c..3aaf97a 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -180,6 +180,7 @@ struct lacp { @@ -22,7 +22,7 @@ index 6731b59..58fa918 100644 struct { bool active; #define LACP_CFG_DFLT_ACTIVE true -@@ -232,6 +233,7 @@ struct lacp_port { +@@ -234,6 +235,7 @@ struct lacp_port { struct lacp_port *agg_lead; /* leading port of aggregator. * NULL in case this port is not selected */ enum lacp_port_state state; @@ -30,7 +30,7 @@ index 6731b59..58fa918 100644 bool lacpdu_saved; struct lacpdu last_pdu; struct { -@@ -1333,6 +1335,7 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, +@@ -1351,6 +1353,7 @@ static int lacp_port_set_state(struct lacp_port *lacp_port, if (err) return err; lacp_port->lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; @@ -38,24 +38,20 @@ index 6731b59..58fa918 100644 teamd_loop_callback_disable(lacp_port->ctx, LACP_RETRY_COUNT_TIMEOUT_CB_NAME, lacp_port->lacp); lacp_port_timeout_set(lacp_port, true); -@@ -1441,10 +1444,10 @@ static int lacpdu_send(struct lacp_port *lacp_port) - if (hwaddr_len != ETH_ALEN) - return 0; - -- if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) -+ if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT) - lacpdu_init(&lacpdu, 0xf1); - else -- lacpdu_init(&lacpdu, 0x01); -+ lacpdu_init(&lacpdu, lacp_port->last_received_lacpdu_version); - lacpdu.actor = lacp_port->actor; - lacpdu.partner = lacp_port->partner; - memcpy(lacpdu.hdr.ether_shost, hwaddr, hwaddr_len); -@@ -1459,9 +1462,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) +@@ -1462,7 +1465,7 @@ static int lacpdu_send(struct lacp_port *lacp_port) + if (lacp_port->lacp->cfg.enable_retry_count) { + if (lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT + || lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { +- lacpdu_init(&lacpdu, 0xf1); ++ lacpdu_init(&lacpdu, lacp_port->last_received_lacpdu_version); + } else { + lacpdu_init(&lacpdu, 0x01); + } +@@ -1483,9 +1486,12 @@ static int lacpdu_send(struct lacp_port *lacp_port) return err; } -+#define LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS 60 ++#define LACP_RETRY_COUNT_RESET_TIMEOUT_SECONDS 60 + static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) { @@ -64,7 +60,7 @@ index 6731b59..58fa918 100644 if (!lacpdu_check(lacpdu)) { teamd_log_warn("malformed LACP PDU came."); -@@ -1495,17 +1501,50 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1523,17 +1529,55 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) lacp_port->partner_retry_count, lacpdu->v2.actor_retry_count); lacp_port->partner_retry_count = lacpdu->v2.actor_retry_count; @@ -76,7 +72,12 @@ index 6731b59..58fa918 100644 + } + if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { + // Reset the change time every time a 0xf1 packet comes in -+ lacp_port->lacp->next_retry_count_change_time = monotonic_time.tv_sec + LACP_NEXT_RETRY_COUNT_CHANGE_TIMEOUT_SECONDS; ++ if (clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { ++ err = errno; ++ teamd_log_err("%s: unable to get current time: %s", lacp_port->tdport->ifname, strerror(err)); ++ return -err; ++ } ++ lacp_port->lacp->next_retry_count_change_time = monotonic_time.tv_sec + LACP_RETRY_COUNT_RESET_TIMEOUT_SECONDS; } } else { if (lacp_port->partner_retry_count != LACP_CFG_DFLT_RETRY_COUNT) { @@ -91,7 +92,7 @@ index 6731b59..58fa918 100644 + return -err; + } + if (monotonic_time.tv_sec < lacp_port->lacp->next_retry_count_change_time) { -+ teamd_log_dbg(lacp_port->ctx, "%s: retry count changed too recently", ++ teamd_log_dbg(lacp_port->ctx, "%s: ignoring resetting retry count to 3", + lacp_port->tdport->ifname); + } else { + teamd_log_dbg(lacp_port->ctx, "%s: retry count from partner changed from %u to %u", @@ -120,13 +121,21 @@ index 6731b59..58fa918 100644 err = lacp_port_set_state(lacp_port, PORT_STATE_CURRENT); if (err) return err; -@@ -1514,8 +1553,7 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) +@@ -1542,8 +1586,7 @@ static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu) /* Check if the other side has correct info about us */ if (memcmp(&lacpdu->partner, &lacp_port->actor, sizeof(struct lacpdu_info)) -- || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) -- || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { -+ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count)) { +- || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count) +- || (lacpdu->version_number != 0xf1 && lacp_port->lacp->cfg.retry_count != LACP_CFG_DFLT_RETRY_COUNT)) { ++ || (lacpdu->version_number == 0xf1 && lacp_port->lacp->cfg.retry_count != lacpdu->v2.partner_retry_count)) { err = lacpdu_send(lacp_port); if (err) return err; +@@ -2210,6 +2253,7 @@ static int lacp_state_retry_count_work(struct teamd_context *ctx, + if (lacp_port_selected(lacp_port)) { + teamd_log_dbg(ctx, "%s: Notifying partner of updated retry count", + lacp_port->tdport->ifname); ++ lacp_port->last_received_lacpdu_version = 0xf1; + lacpdu_send(lacp_port); + } + } From f80bbfd5f0730a288d5b8acedefde29785f40e4b Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Fri, 12 May 2023 13:45:51 -0700 Subject: [PATCH 11/12] Use LACP_CFG_DFLT_RETRY_COUNT if an invalid retry count is set via config Signed-off-by: Saikrishna Arcot --- src/libteam/patch/0015-add-support-for-custom-retry.patch | 6 +++--- src/libteam/patch/0016-block-retry-count-changes.patch | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libteam/patch/0015-add-support-for-custom-retry.patch b/src/libteam/patch/0015-add-support-for-custom-retry.patch index ab69af07ea02..f8bee7dec7a8 100644 --- a/src/libteam/patch/0015-add-support-for-custom-retry.patch +++ b/src/libteam/patch/0015-add-support-for-custom-retry.patch @@ -10,7 +10,7 @@ sessions, to allow for sessions to stay up for more than 90 seconds. 1 file changed, 324 insertions(+), 14 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index 6b43916..d73ee1c 100644 +index 6b43916..3e8a0f6 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -77,22 +77,45 @@ struct lacpdu { @@ -134,8 +134,8 @@ index 6b43916..d73ee1c 100644 + if (err) { + lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; + } else if (tmp < 3) { -+ teamd_log_err("\"retry_count\" value is out of its limits."); -+ return -EINVAL; ++ teamd_log_err("\"retry_count\" value is out of its limits, using LACP standard default (3) instead"); ++ lacp->cfg.retry_count = LACP_CFG_DFLT_RETRY_COUNT; + } else { + lacp->cfg.retry_count = tmp; + } diff --git a/src/libteam/patch/0016-block-retry-count-changes.patch b/src/libteam/patch/0016-block-retry-count-changes.patch index 80fa610e8a0e..5ffca9456b47 100644 --- a/src/libteam/patch/0016-block-retry-count-changes.patch +++ b/src/libteam/patch/0016-block-retry-count-changes.patch @@ -11,7 +11,7 @@ that custom retry count for 60 seconds since the last new packet. 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c -index d73ee1c..3aaf97a 100644 +index 3e8a0f6..c5dad35 100644 --- a/teamd/teamd_runner_lacp.c +++ b/teamd/teamd_runner_lacp.c @@ -180,6 +180,7 @@ struct lacp { From 67c1d527d3fb03d3e7daa2a77224751a3ef31bed Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Tue, 30 May 2023 15:45:31 -0700 Subject: [PATCH 12/12] Address review comments Signed-off-by: Saikrishna Arcot --- src/libteam/patch/0016-block-retry-count-changes.patch | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libteam/patch/0016-block-retry-count-changes.patch b/src/libteam/patch/0016-block-retry-count-changes.patch index 5ffca9456b47..306afd771c53 100644 --- a/src/libteam/patch/0016-block-retry-count-changes.patch +++ b/src/libteam/patch/0016-block-retry-count-changes.patch @@ -5,7 +5,7 @@ Date: 2023-01-18 14:26:36 -0800 After setting the retry count to some custom value, if a normal LACP packet comes in without a custom retry count, don't reset it back to -that custom retry count for 60 seconds since the last new packet. +the default retry count for 60 seconds since the last new packet. --- teamd/teamd_runner_lacp.c | 60 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 8 deletions(-) @@ -105,10 +105,10 @@ index 3e8a0f6..c5dad35 100644 } + if (lacp_port->last_received_lacpdu_version != lacpdu->version_number) { -+ teamd_log_dbg(lacp_port->ctx, "%s: LACPDU version changed from %u to %u", -+ lacp_port->tdport->ifname, -+ lacp_port->last_received_lacpdu_version, -+ lacpdu->version_number); ++ teamd_log_dbg(lacp_port->ctx, "%s: LACPDU version changed from %u to %u", ++ lacp_port->tdport->ifname, ++ lacp_port->last_received_lacpdu_version, ++ lacpdu->version_number); + lacp_port->last_received_lacpdu_version = lacpdu->version_number; + // Force-send a LACPDU packet acknowledging change in version + err = lacpdu_send(lacp_port);