Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance AWS SDK Instrumentation with Detailed HTTP Error Information #9448

Merged
merged 5 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions instrumentation/aws-sdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ For more information, see the respective public setters in the `AwsSdkTelemetryB
- [SDK v1](./aws-sdk-1.11/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v1_11/AwsSdkTelemetryBuilder.java)
- [SDK v2](./aws-sdk-2.2/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v2_2/AwsSdkTelemetryBuilder.java)

| System property | Type | Default | Description |
| ------------------------------------------------------------------------ | ------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------- |
| `otel.instrumentation.aws-sdk.experimental-span-attributes` | Boolean | `false` | Enable the capture of experimental span attributes. |
| System property | Type | Default | Description |
|--------------------------------------------------------------------------| ------- | ------- |---------------------------------------------------------------------------------------------|
| `otel.instrumentation.aws-sdk.experimental-span-attributes` | Boolean | `false` | Enable the capture of experimental span attributes. |
| `otel.instrumentation.aws-sdk.experimental-use-propagator-for-messaging` | Boolean | `false` | v2 only, inject into SNS/SQS attributes with configured propagator: See [v2 README](aws-sdk-2.2/library/README.md#trace-propagation). |
| `otel.instrumentation.aws-sdk.experimental-record-individual-http-error` | Boolean | `false` | v2 only, record errors returned by each individual HTTP request as events for the SDK span. |
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ muzzle {
// Used by all SDK services, the only case it isn't is an SDK extension such as a custom HTTP
// client, which is not target of instrumentation anyways.
extraDependency("software.amazon.awssdk:protocol-core")

excludeInstrumentationName("aws-sdk-2.2-sqs")
excludeInstrumentationName("aws-sdk-2.2-sns")

Expand Down Expand Up @@ -95,6 +96,7 @@ testing {
} else {
implementation("software.amazon.awssdk:s3:2.10.12")
}
implementation(project(":instrumentation:aws-sdk:aws-sdk-2.2:library"))
}
}
}
Expand All @@ -115,6 +117,7 @@ tasks {
withType<Test>().configureEach {
// TODO run tests both with and without experimental span attributes
systemProperty("otel.instrumentation.aws-sdk.experimental-span-attributes", "true")
systemProperty("otel.instrumentation.aws-sdk.experimental-record-individual-http-error", "true")
}

withType<com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar>().configureEach {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

import io.opentelemetry.instrumentation.awssdk.v2_2.AbstractAws2ClientRecordHttpErrorTest;
import io.opentelemetry.instrumentation.testing.junit.AgentInstrumentationExtension;
import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension;
import org.junit.jupiter.api.extension.RegisterExtension;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;

public class Aws2ClientRecordHttpErrorTest extends AbstractAws2ClientRecordHttpErrorTest {
@RegisterExtension
private final AgentInstrumentationExtension testing = AgentInstrumentationExtension.create();

@Override
public ClientOverrideConfiguration.Builder createOverrideConfigurationBuilder() {
return ClientOverrideConfiguration.builder();
}

@Override
protected InstrumentationExtension getTesting() {
return testing;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ tasks {
test {
systemProperty("otel.instrumentation.aws-sdk.experimental-span-attributes", true)
systemProperty("otel.instrumentation.aws-sdk.experimental-use-propagator-for-messaging", true)
systemProperty("otel.instrumentation.aws-sdk.experimental-record-individual-http-error", true)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,15 @@ public class TracingExecutionInterceptor implements ExecutionInterceptor {
ConfigPropertiesUtil.getBoolean(
"otel.instrumentation.aws-sdk.experimental-use-propagator-for-messaging", false);

private static final boolean RECORD_INDIVIDUAL_HTTP_ERROR =
ConfigPropertiesUtil.getBoolean(
"otel.instrumentation.aws-sdk.experimental-record-individual-http-error", false);

private final ExecutionInterceptor delegate =
AwsSdkTelemetry.builder(GlobalOpenTelemetry.get())
.setCaptureExperimentalSpanAttributes(CAPTURE_EXPERIMENTAL_SPAN_ATTRIBUTES)
.setUseConfiguredPropagatorForMessaging(USE_MESSAGING_PROPAGATOR)
.setRecordIndividualHttpError(RECORD_INDIVIDUAL_HTTP_ERROR)
.build()
.newExecutionInterceptor();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,14 @@ public static AwsSdkTelemetryBuilder builder(OpenTelemetry openTelemetry) {
private final boolean captureExperimentalSpanAttributes;
@Nullable private final TextMapPropagator messagingPropagator;
private final boolean useXrayPropagator;
private final boolean recordIndividualHttpError;

AwsSdkTelemetry(
OpenTelemetry openTelemetry,
boolean captureExperimentalSpanAttributes,
boolean useMessagingPropagator,
boolean useXrayPropagator) {
boolean useXrayPropagator,
boolean recordIndividualHttpError) {
this.useXrayPropagator = useXrayPropagator;
this.requestInstrumenter =
AwsSdkInstrumenterFactory.requestInstrumenter(
Expand All @@ -62,6 +64,7 @@ public static AwsSdkTelemetryBuilder builder(OpenTelemetry openTelemetry) {
this.captureExperimentalSpanAttributes = captureExperimentalSpanAttributes;
this.messagingPropagator =
useMessagingPropagator ? openTelemetry.getPropagators().getTextMapPropagator() : null;
this.recordIndividualHttpError = recordIndividualHttpError;
}

/**
Expand All @@ -74,6 +77,7 @@ public ExecutionInterceptor newExecutionInterceptor() {
consumerInstrumenter,
captureExperimentalSpanAttributes,
messagingPropagator,
useXrayPropagator);
useXrayPropagator,
recordIndividualHttpError);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ public final class AwsSdkTelemetryBuilder {

private boolean useMessagingPropagator;

private boolean recordIndividualHttpError;

private boolean useXrayPropagator = true;

AwsSdkTelemetryBuilder(OpenTelemetry openTelemetry) {
Expand Down Expand Up @@ -57,6 +59,20 @@ public AwsSdkTelemetryBuilder setUseConfiguredPropagatorForMessaging(
return this;
}

/**
* Sets whether errors returned by each individual HTTP request should be recorded as events for
* the SDK span.
*
* <p>This option is off by default. If enabled, the HTTP error code and the error message will be
* captured and associated with the span. This provides detailed insights into errors on a
* per-request basis.
*/
@CanIgnoreReturnValue
public AwsSdkTelemetryBuilder setRecordIndividualHttpError(boolean recordIndividualHttpError) {
this.recordIndividualHttpError = recordIndividualHttpError;
return this;
}

/**
* This setter implemented package-private for testing the messaging propagator, it does not seem
* too useful in general. The option is on by default.
Expand All @@ -79,6 +95,7 @@ public AwsSdkTelemetry build() {
openTelemetry,
captureExperimentalSpanAttributes,
useMessagingPropagator,
useXrayPropagator);
useXrayPropagator,
recordIndividualHttpError);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import static io.opentelemetry.instrumentation.awssdk.v2_2.AwsSdkRequestType.DYNAMODB;

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.trace.Span;
Expand All @@ -15,7 +16,14 @@
import io.opentelemetry.contrib.awsxray.propagator.AwsXrayPropagator;
import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter;
import io.opentelemetry.semconv.SemanticAttributes;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import software.amazon.awssdk.auth.signer.AwsSignerExecutionAttribute;
import software.amazon.awssdk.awscore.AwsResponse;
Expand Down Expand Up @@ -50,6 +58,10 @@ final class TracingExecutionInterceptor implements ExecutionInterceptor {
private final Instrumenter<ExecutionAttributes, SdkHttpResponse> consumerInstrumenter;
private final boolean captureExperimentalSpanAttributes;

static final AttributeKey<String> HTTP_ERROR_MSG =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any plans to make this attribute part of the semantic conventions for the aws sdk?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is in the long term plan. Actually I am not quite sure which part of semantic conventions that I should put into because this is the event attribute not the span attribute (the previous link that you give seems to be the conventions for span attributes). Any suggestions?

AttributeKey.stringKey("aws.http.error_message");
static final String HTTP_FAILURE_EVENT = "HTTP request failure";

Instrumenter<ExecutionAttributes, SdkHttpResponse> getConsumerInstrumenter() {
return consumerInstrumenter;
}
Expand All @@ -65,19 +77,22 @@ boolean shouldUseXrayPropagator() {

@Nullable private final TextMapPropagator messagingPropagator;
private final boolean useXrayPropagator;
private final boolean recordIndividualHttpError;
private final FieldMapper fieldMapper;

TracingExecutionInterceptor(
Instrumenter<ExecutionAttributes, SdkHttpResponse> requestInstrumenter,
Instrumenter<ExecutionAttributes, SdkHttpResponse> consumerInstrumenter,
boolean captureExperimentalSpanAttributes,
TextMapPropagator messagingPropagator,
boolean useXrayPropagator) {
boolean useXrayPropagator,
boolean recordIndividualHttpError) {
this.requestInstrumenter = requestInstrumenter;
this.consumerInstrumenter = consumerInstrumenter;
this.captureExperimentalSpanAttributes = captureExperimentalSpanAttributes;
this.messagingPropagator = messagingPropagator;
this.useXrayPropagator = useXrayPropagator;
this.recordIndividualHttpError = recordIndividualHttpError;
this.fieldMapper = new FieldMapper();
}

Expand Down Expand Up @@ -222,6 +237,19 @@ public SdkHttpRequest modifyHttpRequest(
return builder.build();
}

@Override
public Optional<InputStream> modifyHttpResponseContent(
Context.ModifyHttpResponse context, ExecutionAttributes executionAttributes) {
Optional<InputStream> responseBody = context.responseBody();
if (recordIndividualHttpError) {
String errorMsg = extractHttpErrorAsEvent(context, executionAttributes);
if (errorMsg != null) {
return Optional.of(new ByteArrayInputStream(errorMsg.getBytes(Charset.defaultCharset())));
}
}
return responseBody;
}

private void populateRequestAttributes(
Span span,
AwsSdkRequest awsSdkRequest,
Expand Down Expand Up @@ -289,6 +317,37 @@ private void onSdkResponse(
}
}

private static String extractHttpErrorAsEvent(
Context.AfterTransmission context, ExecutionAttributes executionAttributes) {
io.opentelemetry.context.Context otelContext = getContext(executionAttributes);
if (otelContext != null) {
Span span = Span.fromContext(otelContext);
SdkHttpResponse response = context.httpResponse();

if (response != null && !response.isSuccessful()) {
int errorCode = response.statusCode();
// we want to record the error message from http response
Optional<InputStream> responseBody = context.responseBody();
if (responseBody.isPresent()) {
String errorMsg =
new BufferedReader(
new InputStreamReader(responseBody.get(), Charset.defaultCharset()))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the underlying InputStream still available for use by the SDK after calling responseBody.get()?

Copy link
Contributor Author

@pxaws pxaws Sep 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for catching this!

I think there is no guarantee that the InputStream will support mark()/reset() operations or seeking. So the next time if the same InputStream is read, it will not give you anything.

To verify, I did an simple experiment. Based on the pipeline stage in aws java sdk: https://github.com/aws/aws-sdk-java-v2/blob/d020d37138eee9d4d74e814086143d26d923fee0/core/sdk-core/src/main/java/software/amazon/awssdk/core/internal/http/pipeline/stages/AfterTransmissionExecutionInterceptorsStage.java#L43-L46, it seems like modifyHttpResponse call is after afterTransmission call. So I tested the same InputStream in modifyHttpResponse hooks in ExecutionInterceptor and confirmed it's already empty (after it's read out in afterTransmission).

It's a valid concern that the inputStream for the response body will become useless after we read it out in afterTransmission hook. Fortunately there is another hook Optional<InputStream> modifyHttpResponseContent which allow us to modify the content of a response. We can copy the content out and generate a new InputStream from it. A similar approach has already been implemented in the aws java adk: https://github.com/aws/aws-sdk-java-v2/blob/d020d37138eee9d4d74e814086143d26d923fee0/core/sdk-core/src/main/java/software/amazon/awssdk/core/internal/interceptor/HttpChecksumValidationInterceptor.java#L55-L73

I will make the change in next commit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @pxaws

Is there a way to verify in the test that the InputStream is available to the client and has the expected value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I modified the test to do that. I basically defined another execution interceptor which is registered with AWS SDK after the TracingExecutionInterceptor used for SDK instrumentation and then verify from there to make sure the response body contains the expected content. Please see the next commit. Thank you!

.lines()
.collect(Collectors.joining("\n"));
Attributes attributes =
Attributes.of(
SemanticAttributes.HTTP_RESPONSE_STATUS_CODE,
Long.valueOf(errorCode),
HTTP_ERROR_MSG,
errorMsg);
span.addEvent(HTTP_FAILURE_EVENT, attributes);
return errorMsg;
}
}
}
return null;
}

@Override
public void onExecutionFailure(
Context.FailedExecution context, ExecutionAttributes executionAttributes) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.instrumentation.awssdk.v2_2;

import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension;
import io.opentelemetry.instrumentation.testing.junit.LibraryInstrumentationExtension;
import org.junit.jupiter.api.extension.RegisterExtension;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;

public class Aws2ClientNotRecordHttpErrorTest extends AbstractAws2ClientRecordHttpErrorTest {
@RegisterExtension
public final LibraryInstrumentationExtension testing = LibraryInstrumentationExtension.create();

@Override
public ClientOverrideConfiguration.Builder createOverrideConfigurationBuilder() {
return ClientOverrideConfiguration.builder()
.addExecutionInterceptor(
AwsSdkTelemetry.builder(testing.getOpenTelemetry())
.setCaptureExperimentalSpanAttributes(true)
.setRecordIndividualHttpError(isRecordIndividualHttpErrorEnabled())
.build()
.newExecutionInterceptor());
}

@Override
public boolean isRecordIndividualHttpErrorEnabled() {
return false;
}

@Override
protected InstrumentationExtension getTesting() {
return testing;
}
}
Loading
Loading