Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up ProbabilitySampler for 64 bit trace IDs #238

Merged
merged 11 commits into from
Feb 13, 2020
10 changes: 5 additions & 5 deletions opentelemetry-api/src/opentelemetry/trace/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ def __init__(self, rate: float):
self._rate = rate
self._bound = self.get_bound_for_rate(self._rate)

# The sampler checks the last 8 bytes of the trace ID to decide whether to
# sample a given trace.
CHECK_BYTES = 0xFFFFFFFFFFFFFFFF
# For compatibility with 64 bit trace IDs, the sampler checks the 64
# low-order bits of the trace ID to decide whether to sample a given trace.
TRACE_ID_LIMIT = (1 << 64) - 1

@classmethod
def get_bound_for_rate(cls, rate: float) -> int:
return round(rate * (cls.CHECK_BYTES + 1))
return round(rate * (cls.TRACE_ID_LIMIT + 1))

@property
def rate(self) -> float:
Expand All @@ -115,7 +115,7 @@ def should_sample(
if parent_context is not None:
return Decision(parent_context.trace_options.sampled)

return Decision(trace_id & self.CHECK_BYTES < self.bound)
return Decision(trace_id & self.TRACE_ID_LIMIT < self.bound)


# Samplers that ignore the parent sampling decision and never/always sample.
Expand Down
37 changes: 27 additions & 10 deletions opentelemetry-api/tests/trace/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import unittest

from opentelemetry import trace
Expand Down Expand Up @@ -137,7 +138,7 @@ def test_probability_sampler(self):
trace.SpanContext(
0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT
),
0x8000000000000000,
0x7FFFFFFFFFFFFFFF,
c24t marked this conversation as resolved.
Show resolved Hide resolved
0xDEADBEEF,
"span name",
).sampled
Expand All @@ -147,7 +148,7 @@ def test_probability_sampler(self):
trace.SpanContext(
0xDEADBEF0, 0xDEADBEF1, trace_options=TO_SAMPLED
),
0x8000000000000001,
0x8000000000000000,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would 0x0 make it more clear?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same reason as above, but not a strong preference in either case.

0xDEADBEEF,
"span name",
).sampled
Expand Down Expand Up @@ -189,14 +190,13 @@ def test_probability_sampler_limits(self):
sampling.ProbabilitySampler.get_bound_for_rate(2 ** -64), 0x1
)

# Sample every trace with (last 8 bytes of) trace ID less than
# 0xffffffffffffffff. In principle this is the highest possible
# sampling rate less than 1, but we can't actually express this rate as
# a float!
# Sample every trace with trace ID less than 0xffffffffffffffff. In
# principle this is the highest possible sampling rate less than 1, but
# we can't actually express this rate as a float!
#
# In practice, the highest possible sampling rate is:
#
# round(sys.float_info.epsilon * 2 ** 64)
# 1 - sys.float_info.epsilon

almost_always_on = sampling.ProbabilitySampler(1 - 2 ** -64)
self.assertTrue(
Expand All @@ -212,12 +212,29 @@ def test_probability_sampler_limits(self):
# self.assertFalse(
# almost_always_on.should_sample(
# None,
# 0xffffffffffffffff,
# 0xdeadbeef,
# 0xFFFFFFFFFFFFFFFF,
# 0xDEADBEEF,
# "span name",
# ).sampled
# )
# self.assertEqual(
# sampling.ProbabilitySampler.get_bound_for_rate(1 - 2 ** -64)),
# 0xffffffffffffffff,
# 0xFFFFFFFFFFFFFFFF,
# )

# Check that a sampler with the highest effective sampling rate < 1
# refuses to sample traces with trace ID 0xffffffffffffffff.
almost_almost_always_on = sampling.ProbabilitySampler(
1 - sys.float_info.epsilon
)
self.assertFalse(
almost_almost_always_on.should_sample(
None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name"
).sampled
)
# Check that the higest effective sampling rate is actually lower than
# the highest theoretical sampling rate. If this test fails the test
# above is wrong.
self.assertLess(
almost_almost_always_on.bound, 0xFFFFFFFFFFFFFFFF,
)