Skip to content

Commit

Permalink
[PROF-8667] More comment addressing
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexJF committed Nov 30, 2023
1 parent e61e72e commit 0cb2a47
Show file tree
Hide file tree
Showing 13 changed files with 182 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)

// TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
// beta), and having something here allows us to test the rest of feature, sampling decision aside.
if (((allocation_count % state->allocation_sample_every) == 0)) {
if (allocation_count % state->allocation_sample_every == 0) {
// Rescue against any exceptions that happen during sampling
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
}
Expand Down
42 changes: 36 additions & 6 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ struct heap_recorder {
// ==========================
// Heap Recorder External API
//
// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop.
//
// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs
// happens under the GVL.
//
Expand All @@ -38,10 +40,26 @@ heap_recorder* heap_recorder_new(void) {
}

void heap_recorder_free(struct heap_recorder* recorder) {
if (recorder == NULL) {
return;
}

ruby_xfree(recorder);
}

void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight) {
// TODO: Remove when things get implemented
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"

void heap_recorder_after_fork(heap_recorder *heap_recorder) {
// TODO: Implement
}

void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) {
if (heap_recorder == NULL) {
return;
}

heap_recorder->active_recording = (partial_heap_recording) {
.obj = new_obj,
.object_data = (live_object_data) {
Expand All @@ -50,11 +68,11 @@ void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj
};
}

// TODO: Remove when things get implemented
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"

void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
if (heap_recorder == NULL) {
return;
}

partial_heap_recording *active_recording = &heap_recorder->active_recording;

VALUE new_obj = active_recording->obj;
Expand All @@ -72,18 +90,30 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro

// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
void record_heap_free(heap_recorder *heap_recorder, VALUE obj) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

void heap_recorder_flush(heap_recorder *heap_recorder) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

void heap_recorder_for_each_live_object(
heap_recorder *heap_recorder,
void (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
void *for_each_callback_extra_arg,
bool with_gvl) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

Expand Down
11 changes: 9 additions & 2 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// not yet observed are deemed as alive and can be iterated on to produce a
// live heap profile.
//
// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder
// in which case the behaviour will be a noop.
//
// WARN: Unless otherwise stated the heap recorder APIs assume calls are done
// under the GVL.
typedef struct heap_recorder heap_recorder;
Expand Down Expand Up @@ -38,6 +41,9 @@ heap_recorder* heap_recorder_new(void);
// Free a previously initialized heap recorder.
void heap_recorder_free(heap_recorder *heap_recorder);

// Do any cleanup needed after forking.
void heap_recorder_after_fork(heap_recorder *heap_recorder);

// Start a heap allocation recording on the heap recorder for a new object.
//
// This heap allocation recording needs to be ended via ::end_heap_allocation_recording
Expand Down Expand Up @@ -88,14 +94,15 @@ void heap_recorder_flush(heap_recorder *heap_recorder);
// A callback function that shall be called for each live object being tracked
// by the heap recorder. Alongside the iteration_data for each live object,
// a second argument will be forwarded with the contents of the optional
// for_each_callback_extra_arg.
// for_each_callback_extra_arg. Iteration will continue until the callback
// returns false or we run out of objects.
// @param for_each_callback_extra_arg
// Optional (NULL if empty) extra data that should be passed to the
// callback function alongside the data for each live tracked object.
// @param with_gvl
// True if we're calling this while holding the GVL, false otherwise.
void heap_recorder_for_each_live_object(
heap_recorder *heap_recorder,
void (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
void *for_each_callback_extra_arg,
bool with_gvl);
33 changes: 13 additions & 20 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,22 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) {
}
}

#define MAX_DDOG_ERR_MESSAGE_SIZE 128
#define DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX "..."
size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) {
if (capacity == 0 || string == NULL) {
// short-circuit, we can't write anything
ddog_Error_drop(error);
return 0;
}

void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error) {
char error_msg[MAX_DDOG_ERR_MESSAGE_SIZE];
ddog_CharSlice error_msg_slice = ddog_Error_message(error);
size_t copy_size = error_msg_slice.len;
size_t error_msg_size = error_msg_slice.len;
// Account for extra null char for proper cstring
size_t error_msg_size = copy_size + 1;
bool truncated = false;
if (error_msg_size > MAX_DDOG_ERR_MESSAGE_SIZE) {
// if it seems like the error msg won't fit, lets truncate things by limiting
// the copy_size to MAX_SIZE minus the size of the truncation suffix
// (which already includes space for a terminating '\0' due to how sizeof works)
copy_size = MAX_DDOG_ERR_MESSAGE_SIZE - sizeof(DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX);
error_msg_size = MAX_DDOG_ERR_MESSAGE_SIZE;
truncated = true;
}
strncpy(error_msg, error_msg_slice.ptr, copy_size);
if (truncated) {
strcpy(error_msg + copy_size, DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX);
if (error_msg_size >= capacity) {
// Error message too big, lets truncate it to capacity - 1 to allow for extra null at end
error_msg_size = capacity - 1;
}
error_msg[error_msg_size - 1] = '\0';
strncpy(string, error_msg_slice.ptr, error_msg_size);
string[error_msg_size] = '\0';
ddog_Error_drop(error);
grab_gvl_and_raise(rb_eRuntimeError, "Libstreaming error while (%s): %s", while_context, error_msg);
return error_msg_size;
}
9 changes: 4 additions & 5 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) {
return result;
}

// Utility function to be able to easily raise a ddog_Error outside the GVL.
// The message will look like 'Libstreaming error while <while_context>: <error.msg>'
NORETURN(
void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error);
);
// Utility function to be able to extract an error cstring from a ddog_Error.
// Returns the amount of characters written to string (which are necessarily
// bounded by capacity - 1 since the string will be null-terminated).
size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity);

// Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements.
// In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of
Expand Down
72 changes: 39 additions & 33 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight);
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight);
static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj);


Expand All @@ -247,7 +247,7 @@ void stack_recorder_init(VALUE profiling_module) {
rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
rb_define_singleton_method(testing_module, "_native_track_obj_allocation", _native_track_obj_allocation, 3);
rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3);
rb_define_singleton_method(testing_module, "_native_record_obj_free", _native_record_obj_free, 2);

ok_symbol = ID2SYM(rb_intern_const("ok"));
Expand Down Expand Up @@ -283,6 +283,10 @@ static VALUE _native_new(VALUE klass) {

VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state);

// NOTE: We initialize this because we want a new recorder to be operational even without initialization and our
// default is everything enabled. However, if during recording initialization it turns out we don't want
// heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific
// to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops).
state->heap_recorder = heap_recorder_new();

// Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking!
Expand Down Expand Up @@ -415,15 +419,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
// Need to do this while still holding on to the Global VM Lock; see comments on method for why
serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);

if (state->heap_recorder != NULL) {
// Flush any pending data in the heap recorder prior to doing the iteration during serialization.
// This needs to happen while holding on to the GVL
// TODO: Can this deadlock with the sampler thread due to GVL or Ruby specific things?
// 1. Sampler is in the middle of recording a heap allocation.
// 2. Recorder gets scheduled and tries to acquire the lock, waiting if needed.
// 3. Are we guaranteed that the sampler can make progress?
heap_recorder_flush(state->heap_recorder);
}
// Flush any pending data in the heap recorder prior to doing the iteration during serialization.
// This needs to happen while holding on to the GVL
heap_recorder_flush(state->heap_recorder);

// We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
// is pending
Expand Down Expand Up @@ -486,9 +484,8 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns;
metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;

if (values.alloc_samples != 0 && state->heap_recorder != NULL) {
// If we got an allocation sample and our heap recorder is initialized,
// end the heap allocation recording to commit the heap sample.
if (values.alloc_samples != 0) {
// If we got an allocation sample end the heap allocation recording to commit the heap sample.
// FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
// very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
// be fixed with some refactoring but for now this leads to a less impactful change.
Expand All @@ -515,10 +512,6 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
if (state->heap_recorder == NULL) {
// we aren't gathering heap data so we can ignore this
return;
}
// FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
// very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
// be fixed with some refactoring but for now this leads to a less impactful change.
Expand All @@ -529,10 +522,6 @@ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample
void record_obj_free(VALUE recorder_instance, VALUE freed_object) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
if (state->heap_recorder == NULL) {
// we aren't gathering heap data so we can ignore this
return;
}
record_heap_free(state->heap_recorder, freed_object);
}

Expand All @@ -551,14 +540,18 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
}
}

#define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256

// Heap recorder iteration context allows us access to stack recorder state and profile being serialized
// during iteration of heap recorder live objects.
typedef struct heap_recorder_iteration_context {
struct stack_recorder_state *state;
ddog_prof_Profile *profile;
bool error;
char errorMsg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
} heap_recorder_iteration_context;

static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg;

int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
Expand All @@ -577,28 +570,42 @@ static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
);

if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
grab_gvl_and_raise_ddogerr_and_drop("recording heap sample", &result.err);
read_ddogerr_string_and_drop(&result.err, context->errorMsg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
context->error = true;
// By returning false we cancel the iteration
return false;
}

// Keep on iterating to next item!
return true;
}

static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) {
heap_recorder_iteration_context iteration_context = {
.state = state,
.profile = profile
.profile = profile,
.error = false,
.errorMsg = {0},
};
heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false);
if (iteration_context.error) {
// We wait until we're out of the iteration to grab the gvl and raise. This is important because during
// iteration we first grab the records_mutex and raising requires grabbing the GVL. When sampling, we are
// in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This
// different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold
// records_mutex, we prevent this different-lock-acquisition-order situation.
grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.errorMsg);
}
}

static void *call_serialize_without_gvl(void *call_args) {
struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;

args->profile = serializer_flip_active_and_inactive_slots(args->state);

if (args->state->heap_recorder != NULL) {
// Now that we have the inactive profile with all but heap samples, lets fill it with heap data
// without needing to race with the active sampler
build_heap_profile_without_gvl(args->state, args->profile);
}
// Now that we have the inactive profile with all but heap samples, lets fill it with heap data
// without needing to race with the active sampler
build_heap_profile_without_gvl(args->state, args->profile);

// Note: The profile gets reset by the serialize call
args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
Expand Down Expand Up @@ -721,8 +728,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
reset_profile(&state->slot_one_profile, /* start_time: */ NULL);
reset_profile(&state->slot_two_profile, /* start_time: */ NULL);

// After a fork, all tracked live objects should still be alive in the child process so we purposefully DON'T
// reset the heap recorder. The live heap it is tracking is still alive after all.
heap_recorder_after_fork(state->heap_recorder);

return Qtrue;
}
Expand All @@ -741,7 +747,7 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
return Qtrue;
}

static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) {
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) {
ENFORCE_TYPE(weight, T_FIXNUM);
track_object(recorder_instance, new_obj, NUM2UINT(weight));
return Qtrue;
Expand Down
4 changes: 1 addition & 3 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def initialize(*_)
# in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on
# unsupported Rubies may result in unexpected behaviour, including crashes.
#
# @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it.
# @note Allocation profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it.
#
# @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false`
option :experimental_allocation_enabled do |o|
Expand Down Expand Up @@ -361,8 +361,6 @@ def initialize(*_)
# The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In
# particular, a value of 1 will sample ALL allocations.
#
# This feature is not supported in all Rubies. Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED}
#
# @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`.
option :experimental_allocation_sample_rate do |o|
o.type :int
Expand Down
Loading

0 comments on commit 0cb2a47

Please sign in to comment.