Skip to content

Commit

Permalink
[PROF-10201] Disable profiler allocation counting feature by default
Browse files Browse the repository at this point in the history
**What does this PR do?**

This PR introduces a new setting for the profiler:
`profiling.advanced.allocation_counting_enabled` which controls the
profiler allocation counting feature.

This setting is off by default, enabling us to reduce allocation
profiling overhead slightly.

**Motivation:**

We actually used to have this setting back in the ddtrace 1.x series.
We introduced it in #2635 and removed it in #3281 -- by tieing it
directly to allocation profiling.

I decided to re-introduce it so we can disable this feature by
default.

**Additional Notes:**

This PR sits atop #3797 because it makes sense to measure both overhead
improvements together, but is otherwise completely independent.

**How to test the change?**

Here's the results for running the `profiler_allocation` benchmark:

```
ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5) [x86_64-linux]
Warming up --------------------------------------
Allocations (baseline)   1.419M i/100ms
Calculating -------------------------------------
Allocations (baseline)   14.535M (± 2.1%) i/s -    146.197M in  10.062717s

Warming up --------------------------------------
Allocations (alloc_profiling_enabled) 1.122M i/100ms
Calculating -------------------------------------
Allocations (alloc_profiling_enabled) 11.636M (± 2.2%) i/s -    116.679M in  10.032209s

Warming up --------------------------------------
Allocations (alloc_profiling_disabled) 1.124M i/100ms
Calculating -------------------------------------
Allocations (alloc_profiling_disabled) 11.866M (± 2.6%) i/s -    119.175M in  10.050580s

Comparison:
Allocations (baseline): 14534979.3 i/s
Allocations (alloc_profiling_disabled): 11865926.7 i/s - 1.22x  slower
Allocations (alloc_profiling_enabled): 11635919.9 i/s - 1.25x  slower
```

The difference is close to the margin of error; nevertheless
this feature was showing up on the native profiler, and since it was
on the hot path for allocation profiling, I think it's worth it.
  • Loading branch information
ivoanjo committed Jul 22, 2024
1 parent 446f2ad commit 94f14f0
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
bool no_signals_workaround_enabled;
bool dynamic_sampling_rate_enabled;
bool allocation_profiling_enabled;
bool allocation_counting_enabled;
bool skip_idle_samples_for_testing;
VALUE self_instance;
VALUE thread_context_collector_instance;
Expand Down Expand Up @@ -181,6 +182,7 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
);
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
Expand Down Expand Up @@ -278,7 +280,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);

rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
Expand Down Expand Up @@ -325,6 +327,7 @@ static VALUE _native_new(VALUE klass) {
state->no_signals_workaround_enabled = false;
state->dynamic_sampling_rate_enabled = true;
state->allocation_profiling_enabled = false;
state->allocation_counting_enabled = false;
state->skip_idle_samples_for_testing = false;
state->thread_context_collector_instance = Qnil;
state->idle_sampling_helper_instance = Qnil;
Expand Down Expand Up @@ -366,13 +369,15 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
) {
ENFORCE_BOOLEAN(gc_profiling_enabled);
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
ENFORCE_BOOLEAN(allocation_profiling_enabled);
ENFORCE_BOOLEAN(allocation_counting_enabled);
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)

struct cpu_and_wall_time_worker_state *state;
Expand All @@ -382,6 +387,7 @@ static VALUE _native_initialize(
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);

double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
Expand Down Expand Up @@ -1040,7 +1046,9 @@ static void sleep_for(uint64_t time_ns) {
}

static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;

bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;

return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
}
Expand All @@ -1067,19 +1075,21 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
// On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to
// be as fast as possible.
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}

struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above

// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
// and disabled before it is cleared, but just in case...
if (state == NULL) return;

if (RB_UNLIKELY(state->allocation_counting_enabled)) {
// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}
}

// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
// sample, so we just return early
if (state->during_sample) {
Expand Down
10 changes: 10 additions & 0 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,16 @@ def initialize(*_)
o.default true
end

# Can be used to enable/disable the Datadog::Profiling.allocation_count feature.
#
# Requires allocation profiling to be enabled.
#
# @default false
option :allocation_counting_enabled do |o|
o.type :bool
o.default false
end

# Can be used to enable/disable the collection of heap profiles.
#
# This feature is alpha and disabled by default
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def self.start_if_enabled
# (This is similar to some OS-based time representations.)
#
# Note 2: All fibers in the same thread will share the same counter values.
# Note 3: This counter is not accurate when using the M:N scheduler.
#
# Only available when the profiler is running, and allocation-related features are not disabled via configuration.
#
Expand Down
2 changes: 2 additions & 0 deletions lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def initialize(
thread_context_collector:,
dynamic_sampling_rate_overhead_target_percentage:,
allocation_profiling_enabled:,
allocation_counting_enabled:,
# **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the
# profiler overhead!
dynamic_sampling_rate_enabled: true,
Expand All @@ -42,6 +43,7 @@ def initialize(
dynamic_sampling_rate_enabled,
dynamic_sampling_rate_overhead_target_percentage,
allocation_profiling_enabled,
allocation_counting_enabled,
skip_idle_samples_for_testing,
)
@worker_thread = nil
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
thread_context_collector: thread_context_collector,
dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
)

internal_metadata = {
Expand Down
2 changes: 2 additions & 0 deletions sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module Datadog
?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper,
?dynamic_sampling_rate_enabled: bool,
allocation_profiling_enabled: bool,
allocation_counting_enabled: bool,
?skip_idle_samples_for_testing: false,
) -> void

Expand All @@ -27,6 +28,7 @@ module Datadog
bool dynamic_sampling_rate_enabled,
Float dynamic_sampling_rate_overhead_target_percentage,
bool allocation_profiling_enabled,
bool allocation_counting_enabled,
bool skip_idle_samples_for_testing,
) -> true

Expand Down
15 changes: 15 additions & 0 deletions spec/datadog/core/configuration/settings_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,21 @@
end
end

describe '#allocation_counting_enabled' do
subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled }

it { is_expected.to be false }
end

describe '#allocation_counting_enabled=' do
it 'updates the #allocation_counting_enabled setting' do
expect { settings.profiling.advanced.allocation_counting_enabled = true }
.to change { settings.profiling.advanced.allocation_counting_enabled }
.from(false)
.to(true)
end
end

describe '#experimental_heap_enabled' do
subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
let(:no_signals_workaround_enabled) { false }
let(:timeline_enabled) { false }
let(:options) { {} }
let(:allocation_counting_enabled) { false }
let(:worker_settings) do
{
gc_profiling_enabled: gc_profiling_enabled,
no_signals_workaround_enabled: no_signals_workaround_enabled,
thread_context_collector: build_thread_context_collector(recorder),
dynamic_sampling_rate_overhead_target_percentage: 2.0,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: allocation_counting_enabled,
**options
}
end
Expand Down Expand Up @@ -961,8 +963,9 @@
cpu_and_wall_time_worker.stop
end

context 'when allocation profiling is enabled' do
context 'when allocation profiling and allocation counting is enabled' do
let(:allocation_profiling_enabled) { true }
let(:allocation_counting_enabled) { true }

it 'always returns a >= 0 value' do
expect(described_class._native_allocation_count).to be >= 0
Expand Down Expand Up @@ -1040,10 +1043,21 @@
expect(after_t1 - before_t1).to be 100
expect(after_allocations - before_allocations).to be < 10
end

context 'when allocation profiling is enabled but allocation counting is disabled' do
let(:allocation_counting_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

expect(described_class._native_allocation_count).to be nil
end
end
end

context 'when allocation profiling is disabled' do
let(:allocation_profiling_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

Expand Down
3 changes: 3 additions & 0 deletions spec/datadog/profiling/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,16 @@
.and_return(:overhead_target_percentage_config)
expect(described_class).to receive(:valid_overhead_target)
.with(:overhead_target_percentage_config).and_return(:overhead_target_percentage_config)
expect(settings.profiling.advanced)
.to receive(:allocation_counting_enabled).and_return(:allocation_counting_enabled_config)

expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with(
gc_profiling_enabled: anything,
no_signals_workaround_enabled: :no_signals_result,
thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext),
dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config,
allocation_profiling_enabled: false,
allocation_counting_enabled: :allocation_counting_enabled_config,
)

build_profiler_component
Expand Down

0 comments on commit 94f14f0

Please sign in to comment.