diff --git a/docs-requirements.txt b/docs-requirements.txt index b641eb6a27a..64df058dc11 100644 --- a/docs-requirements.txt +++ b/docs-requirements.txt @@ -20,7 +20,4 @@ sqlalchemy>=1.0 thrift>=0.10.0 wrapt>=1.0.0,<2.0.0 psutil~=5.7.0 -google-cloud-core >=1.3.0 -google-api-core >=1.17.0 -google-cloud-trace >=0.23.0 -grpcio >=1.28.1 \ No newline at end of file +google-cloud-trace >=0.23.0 \ No newline at end of file diff --git a/ext/opentelemetry-exporter-cloud-trace/src/opentelemetry/exporter/cloud_trace/__init__.py b/ext/opentelemetry-exporter-cloud-trace/src/opentelemetry/exporter/cloud_trace/__init__.py index 3ee1cf88f04..b97a964cbfb 100644 --- a/ext/opentelemetry-exporter-cloud-trace/src/opentelemetry/exporter/cloud_trace/__init__.py +++ b/ext/opentelemetry-exporter-cloud-trace/src/opentelemetry/exporter/cloud_trace/__init__.py @@ -53,10 +53,17 @@ import opentelemetry.trace as trace_api from opentelemetry.sdk.trace import Event from opentelemetry.sdk.trace.export import Span, SpanExporter, SpanExportResult +from opentelemetry.sdk.util import BoundedDict from opentelemetry.util import types logger = logging.getLogger(__name__) +MAX_NUM_LINKS = 128 +MAX_NUM_EVENTS = 32 +MAX_EVENT_ATTRS = 4 +MAX_LINK_ATTRS = 32 +MAX_SPAN_ATTRS = 32 + class CloudTraceSpanExporter(SpanExporter): """Cloud Trace span exporter for OpenTelemetry. @@ -129,7 +136,11 @@ def _translate_to_cloud_trace( start_time = _get_time_from_ns(span.start_time) end_time = _get_time_from_ns(span.end_time) - attributes = _extract_attributes(span.attributes) + if len(span.attributes) > MAX_SPAN_ATTRS: + logger.warning( + "Span has more then %s attributes, some will be truncated", + MAX_SPAN_ATTRS, + ) cloud_trace_spans.append( { @@ -141,7 +152,9 @@ def _translate_to_cloud_trace( "start_time": start_time, "end_time": end_time, "parent_span_id": parent_id, - "attributes": attributes, + "attributes": _extract_attributes( + span.attributes, MAX_SPAN_ATTRS + ), "links": _extract_links(span.links), "status": _extract_status(span.status), "time_events": _extract_events(span.events), @@ -185,7 +198,9 @@ def _get_truncatable_str_object(str_to_convert: str, max_length: int): def _truncate_str(str_to_check: str, limit: int) -> Tuple[str, int]: """Check the length of a string. If exceeds limit, then truncate it.""" - return str_to_check[:limit], max(0, len(str_to_check) - limit) + encoded = str_to_check.encode("utf-8") + truncated_str = encoded[:limit].decode("utf-8", errors="ignore") + return truncated_str, len(encoded) - len(truncated_str.encode("utf-8")) def _extract_status(status: trace_api.Status) -> Status: @@ -205,7 +220,20 @@ def _extract_links(links: Sequence[trace_api.Link]) -> ProtoSpan.Links: if not links: return None extracted_links = [] + dropped_links = 0 + if len(links) > MAX_NUM_LINKS: + logger.warning( + "Exporting more then %s links, some will be truncated", + MAX_NUM_LINKS, + ) + dropped_links = len(links) - MAX_NUM_LINKS + links = links[:MAX_NUM_LINKS] for link in links: + if len(link.attributes) > MAX_LINK_ATTRS: + logger.warning( + "Link has more then %s attributes, some will be truncated", + MAX_LINK_ATTRS, + ) trace_id = _get_hexadecimal_trace_id(link.context.trace_id) span_id = _get_hexadecimal_span_id(link.context.span_id) extracted_links.append( @@ -213,10 +241,14 @@ def _extract_links(links: Sequence[trace_api.Link]) -> ProtoSpan.Links: "trace_id": trace_id, "span_id": span_id, "type": "TYPE_UNSPECIFIED", - "attributes": _extract_attributes(link.attributes), + "attributes": _extract_attributes( + link.attributes, MAX_LINK_ATTRS + ), } ) - return ProtoSpan.Links(link=extracted_links, dropped_links_count=0) + return ProtoSpan.Links( + link=extracted_links, dropped_links_count=dropped_links + ) def _extract_events(events: Sequence[Event]) -> ProtoSpan.TimeEvents: @@ -224,11 +256,20 @@ def _extract_events(events: Sequence[Event]) -> ProtoSpan.TimeEvents: if not events: return None logs = [] + dropped_annontations = 0 + if len(events) > MAX_NUM_EVENTS: + logger.warning( + "Exporting more then %s annotations, some will be truncated", + MAX_NUM_EVENTS, + ) + dropped_annontations = len(events) - MAX_NUM_EVENTS + events = events[:MAX_NUM_EVENTS] for event in events: - if len(event.attributes) > 4: + if len(event.attributes) > MAX_EVENT_ATTRS: logger.warning( - "Event %s has more then 4 attributes, some will be truncated", + "Event %s has more then %s attributes, some will be truncated", event.name, + MAX_EVENT_ATTRS, ) logs.append( { @@ -237,20 +278,24 @@ def _extract_events(events: Sequence[Event]) -> ProtoSpan.TimeEvents: "description": _get_truncatable_str_object( event.name, 256 ), - "attributes": _extract_attributes(event.attributes), + "attributes": _extract_attributes( + event.attributes, MAX_EVENT_ATTRS + ), }, } ) return ProtoSpan.TimeEvents( time_event=logs, - dropped_annotations_count=0, + dropped_annotations_count=dropped_annontations, dropped_message_events_count=0, ) -def _extract_attributes(attrs: types.Attributes) -> ProtoSpan.Attributes: +def _extract_attributes( + attrs: types.Attributes, num_attrs_limit: int +) -> ProtoSpan.Attributes: """Convert span.attributes to dict.""" - attributes_dict = {} + attributes_dict = BoundedDict(num_attrs_limit) for key, value in attrs.items(): key = _truncate_str(key, 128)[0] @@ -258,7 +303,10 @@ def _extract_attributes(attrs: types.Attributes) -> ProtoSpan.Attributes: if value is not None: attributes_dict[key] = value - return ProtoSpan.Attributes(attribute_map=attributes_dict) + return ProtoSpan.Attributes( + attribute_map=attributes_dict, + dropped_attributes_count=len(attrs) - len(attributes_dict), + ) def _format_attribute_value(value: types.AttributeValue) -> AttributeValue: diff --git a/ext/opentelemetry-exporter-cloud-trace/tests/test_cloud_trace_exporter.py b/ext/opentelemetry-exporter-cloud-trace/tests/test_cloud_trace_exporter.py index 18745d6b2dc..5ebd5f3b649 100644 --- a/ext/opentelemetry-exporter-cloud-trace/tests/test_cloud_trace_exporter.py +++ b/ext/opentelemetry-exporter-cloud-trace/tests/test_cloud_trace_exporter.py @@ -21,6 +21,10 @@ from google.rpc.status_pb2 import Status from opentelemetry.exporter.cloud_trace import ( + MAX_EVENT_ATTRS, + MAX_LINK_ATTRS, + MAX_NUM_EVENTS, + MAX_NUM_LINKS, CloudTraceSpanExporter, _extract_attributes, _extract_events, @@ -47,7 +51,6 @@ def setUp(self): "bool_key": False, "double_key": 1.421, "int_key": 123, - "int_key2": 1234, } self.extracted_attributes_variety_pack = ProtoSpan.Attributes( attribute_map={ @@ -63,7 +66,6 @@ def setUp(self): ) ), "int_key": AttributeValue(int_value=123), - "int_key2": AttributeValue(int_value=1234), } ) @@ -141,18 +143,24 @@ def test_extract_status(self): def test_extract_attributes(self): self.assertEqual( - _extract_attributes({}), ProtoSpan.Attributes(attribute_map={}) + _extract_attributes({}, 4), ProtoSpan.Attributes(attribute_map={}) ) self.assertEqual( - _extract_attributes(self.attributes_variety_pack), + _extract_attributes(self.attributes_variety_pack, 4), self.extracted_attributes_variety_pack, ) # Test ignoring attributes with illegal value type self.assertEqual( - _extract_attributes({"illegal_attribute_value": dict()}), - ProtoSpan.Attributes(attribute_map={}), + _extract_attributes({"illegal_attribute_value": dict()}, 4), + ProtoSpan.Attributes(attribute_map={}, dropped_attributes_count=1), ) + too_many_attrs = {} + for attr_key in range(5): + too_many_attrs[str(attr_key)] = 0 + proto_attrs = _extract_attributes(too_many_attrs, 4) + self.assertEqual(proto_attrs.dropped_attributes_count, 1) + def test_extract_events(self): self.assertIsNone(_extract_events([])) time_in_ns1 = 1589919268850900051 @@ -189,7 +197,7 @@ def test_extract_events(self): value="event2", truncated_byte_count=0 ), "attributes": ProtoSpan.Attributes( - attribute_map={} + attribute_map={}, dropped_attributes_count=1 ), }, }, @@ -249,20 +257,27 @@ def test_extract_links(self): "attributes": { "attribute_map": { "int_attr_value": AttributeValue(int_value=123) - } + }, + "dropped_attributes_count": 1, }, }, ] ), ) - def test_truncate_string(self): + # pylint:disable=too-many-locals + def test_truncate(self): + """Cloud Trace API imposes limits on the length of many things, + e.g. strings, number of events, number of attributes. We truncate + these things before sending it to the API as an optimization. + """ str_300 = "a" * 300 str_256 = "a" * 256 str_128 = "a" * 128 self.assertEqual(_truncate_str("aaaa", 1), ("a", 3)) self.assertEqual(_truncate_str("aaaa", 5), ("aaaa", 0)) self.assertEqual(_truncate_str("aaaa", 4), ("aaaa", 0)) + self.assertEqual(_truncate_str("中文翻译", 4), ("中", 9)) self.assertEqual( _format_attribute_value(str_300), @@ -272,8 +287,9 @@ def test_truncate_string(self): ) ), ) + self.assertEqual( - _extract_attributes({str_300: str_300}), + _extract_attributes({str_300: str_300}, 4), ProtoSpan.Attributes( attribute_map={ str_128: AttributeValue( @@ -304,3 +320,82 @@ def test_truncate_string(self): ] ), ) + + trace_id = "6e0c63257de34c92bf9efcd03927272e" + span_id = "95bb5edabd45950f" + link = Link( + context=SpanContext( + trace_id=int(trace_id, 16), + span_id=int(span_id, 16), + is_remote=False, + ), + attributes={}, + ) + too_many_links = [link] * (MAX_NUM_LINKS + 1) + self.assertEqual( + _extract_links(too_many_links), + ProtoSpan.Links( + link=[ + { + "trace_id": trace_id, + "span_id": span_id, + "type": "TYPE_UNSPECIFIED", + "attributes": {}, + } + ] + * MAX_NUM_LINKS, + dropped_links_count=len(too_many_links) - MAX_NUM_LINKS, + ), + ) + + link_attrs = {} + for attr_key in range(MAX_LINK_ATTRS + 1): + link_attrs[str(attr_key)] = 0 + attr_link = Link( + context=SpanContext( + trace_id=int(trace_id, 16), + span_id=int(span_id, 16), + is_remote=False, + ), + attributes=link_attrs, + ) + + proto_link = _extract_links([attr_link]) + self.assertEqual( + len(proto_link.link[0].attributes.attribute_map), MAX_LINK_ATTRS + ) + + too_many_events = [event1] * (MAX_NUM_EVENTS + 1) + self.assertEqual( + _extract_events(too_many_events), + ProtoSpan.TimeEvents( + time_event=[ + { + "time": time_in_ms_and_ns1, + "annotation": { + "description": TruncatableString( + value=str_256, truncated_byte_count=300 - 256 + ), + "attributes": {}, + }, + }, + ] + * MAX_NUM_EVENTS, + dropped_annotations_count=len(too_many_events) + - MAX_NUM_EVENTS, + ), + ) + + time_in_ns1 = 1589919268850900051 + event_attrs = {} + for attr_key in range(MAX_EVENT_ATTRS + 1): + event_attrs[str(attr_key)] = 0 + proto_events = _extract_events( + [Event(name="a", attributes=event_attrs, timestamp=time_in_ns1)] + ) + self.assertEqual( + len( + proto_events.time_event[0].annotation.attributes.attribute_map + ), + MAX_EVENT_ATTRS, + )