From 8eb15d25b7fb1f6778a6fc364e1d9d7fd47f979d Mon Sep 17 00:00:00 2001 From: samugi Date: Fri, 21 Jun 2024 21:33:47 +0200 Subject: [PATCH 1/2] fix(tracing): ensure sampling decisions apply to correct trace ID Kong generates a temporary trace ID when spans are created, this trace ID can be overwritten if tracing headers are passed in the request. Before this change, the global sampling rate from kong.conf was applied to the temporary trace ID, which was only valid for traces created by Kong, and not in distributed tracing scenarios. This commit ensures the sampling decision is only taken once, after headers have been read, when the trace id is final. This ensures Kong's sampling decision matches that of other tracing systems and therefore guarantees that traces are not broken when sampling rates are set to the same values. --- .../kong/fix-tracing-sampling-rate.yml | 3 + kong/pdk/tracing.lua | 10 +-- .../37-opentelemetry/04-exporter_spec.lua | 74 +++++++++++++++++++ 3 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 changelog/unreleased/kong/fix-tracing-sampling-rate.yml diff --git a/changelog/unreleased/kong/fix-tracing-sampling-rate.yml b/changelog/unreleased/kong/fix-tracing-sampling-rate.yml new file mode 100644 index 000000000000..41c4bd2bc02a --- /dev/null +++ b/changelog/unreleased/kong/fix-tracing-sampling-rate.yml @@ -0,0 +1,3 @@ +message: "**OpenTelemetry:** improved accuracy of sampling decisions." +type: bugfix +scope: Plugin diff --git a/kong/pdk/tracing.lua b/kong/pdk/tracing.lua index 9d653989f447..a1ab6533e6e9 100644 --- a/kong/pdk/tracing.lua +++ b/kong/pdk/tracing.lua @@ -575,12 +575,13 @@ local function new_tracer(name, options) -- @tparam number sampling_rate the sampling rate to apply for the -- probability sampler -- @treturn bool sampled value of sampled for this trace - function self:get_sampling_decision(parent_should_sample, sampling_rate) + function self:get_sampling_decision(parent_should_sample, plugin_sampling_rate) local ctx = ngx.ctx local sampled local root_span = ctx.KONG_SPANS and ctx.KONG_SPANS[1] local trace_id = tracing_context.get_raw_trace_id(ctx) + local sampling_rate = plugin_sampling_rate or kong.configuration.tracing_sampling_rate if not root_span or root_span.attributes["kong.propagation_only"] then -- should not sample if there is no root span or if the root span is @@ -592,12 +593,7 @@ local function new_tracer(name, options) -- and Kong is configured to only do headers propagation sampled = parent_should_sample - elseif not sampling_rate then - -- no custom sampling_rate was passed: - -- reuse the sampling result of the root_span - sampled = root_span.should_sample == true - - else + elseif sampling_rate then -- use probability-based sampler local err sampled, err = self.sampler(trace_id, sampling_rate) diff --git a/spec/03-plugins/37-opentelemetry/04-exporter_spec.lua b/spec/03-plugins/37-opentelemetry/04-exporter_spec.lua index 839ec5f70312..aa65233e123a 100644 --- a/spec/03-plugins/37-opentelemetry/04-exporter_spec.lua +++ b/spec/03-plugins/37-opentelemetry/04-exporter_spec.lua @@ -243,6 +243,80 @@ for _, strategy in helpers.each_strategy() do end) end + + describe("With config.sampling_rate unset, using global sampling rate: 0.5", function () + local mock + local sampling_rate = 0.5 + -- this trace_id is always sampled with 0.5 rate + local sampled_trace_id = "92a54b3e1a7c4f2da9e44b8a6f3e1dab" + -- this trace_id is never sampled with 0.5 rate + local non_sampled_trace_id = "4bf92f3577b34da6a3ce929d0e0e4736" + + lazy_setup(function() + bp, _ = assert(helpers.get_db_utils(strategy, { + "services", + "routes", + "plugins", + }, { "opentelemetry" })) + + setup_instrumentations("all", {}, nil, nil, nil, nil, sampling_rate) + mock = helpers.http_mock(HTTP_SERVER_PORT, { timeout = HTTP_MOCK_TIMEOUT }) + end) + + lazy_teardown(function() + helpers.stop_kong() + if mock then + mock("close", true) + end + end) + + it("does not sample spans when trace_id == non_sampled_trace_id", function() + local cli = helpers.proxy_client(7000, PROXY_PORT) + local r = assert(cli:send { + method = "GET", + path = "/", + headers = { + traceparent = "00-" .. non_sampled_trace_id .. "-0123456789abcdef-01" + } + }) + assert.res_status(200, r) + + cli:close() + + ngx.sleep(2) + local lines = mock() + assert.is_falsy(lines) + end) + + it("samples spans when trace_id == sampled_trace_id", function () + for _ = 1, 10 do + local body + helpers.wait_until(function() + local cli = helpers.proxy_client(7000, PROXY_PORT) + local r = assert(cli:send { + method = "GET", + path = "/", + headers = { + traceparent = "00-" .. sampled_trace_id .. "-0123456789abcdef-01" + } + }) + assert.res_status(200, r) + + cli:close() + + local lines + lines, body = mock() + return lines + end, 10) + + local decoded = assert(pb.decode("opentelemetry.proto.collector.trace.v1.ExportTraceServiceRequest", body)) + assert.not_nil(decoded) + local scope_spans = decoded.resource_spans[1].scope_spans + assert.is_true(#scope_spans > 0, scope_spans) + end + end) + end) + for _, case in ipairs{ {true, true, true}, {true, true, nil}, From f7366dc70702b017099c8d888c27bbe13b684121 Mon Sep 17 00:00:00 2001 From: Samuele Date: Wed, 26 Jun 2024 10:10:56 +0200 Subject: [PATCH 2/2] Update changelog/unreleased/kong/fix-tracing-sampling-rate.yml Co-authored-by: Qi --- changelog/unreleased/kong/fix-tracing-sampling-rate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/unreleased/kong/fix-tracing-sampling-rate.yml b/changelog/unreleased/kong/fix-tracing-sampling-rate.yml index 41c4bd2bc02a..6b27e3c90563 100644 --- a/changelog/unreleased/kong/fix-tracing-sampling-rate.yml +++ b/changelog/unreleased/kong/fix-tracing-sampling-rate.yml @@ -1,3 +1,3 @@ -message: "**OpenTelemetry:** improved accuracy of sampling decisions." +message: "**OpenTelemetry:** Improved accuracy of sampling decisions." type: bugfix scope: Plugin