From 44111840ad3e4150126f0d318f06ca2bfda68b81 Mon Sep 17 00:00:00 2001 From: Diego Hurtado Date: Mon, 18 Nov 2019 19:50:08 -0600 Subject: [PATCH] Set status for ended spans Fixes #292 --- .../src/opentelemetry/ext/pymongo/__init__.py | 8 +- .../tests/test_pymongo_integration.py | 9 +- .../src/opentelemetry/trace/__init__.py | 33 +- .../src/opentelemetry/trace/status.py | 366 ++++++++++-------- .../src/opentelemetry/sdk/trace/__init__.py | 12 +- opentelemetry-sdk/tests/trace/test_trace.py | 24 +- tox.ini | 2 +- 7 files changed, 274 insertions(+), 180 deletions(-) diff --git a/ext/opentelemetry-ext-pymongo/src/opentelemetry/ext/pymongo/__init__.py b/ext/opentelemetry-ext-pymongo/src/opentelemetry/ext/pymongo/__init__.py index fa1cc1583e1..dfd1c06e0fa 100644 --- a/ext/opentelemetry-ext-pymongo/src/opentelemetry/ext/pymongo/__init__.py +++ b/ext/opentelemetry-ext-pymongo/src/opentelemetry/ext/pymongo/__init__.py @@ -20,7 +20,9 @@ from pymongo import monitoring from opentelemetry.trace import SpanKind -from opentelemetry.trace.status import Status, StatusCanonicalCode +from opentelemetry.trace.status import ( + Status, StatusCanonicalCode, OkStatus, UnknownStatus +) DATABASE_TYPE = "mongodb" COMMAND_ATTRIBUTES = ["filter", "sort", "skip", "limit", "pipeline"] @@ -82,7 +84,7 @@ def succeeded(self, event: monitoring.CommandSucceededEvent): span.set_attribute( "db.mongo.duration_micros", event.duration_micros ) - span.set_status(Status(StatusCanonicalCode.OK, event.reply)) + span.set_status(OkStatus()) span.end() self._remove_span(event) @@ -92,7 +94,7 @@ def failed(self, event: monitoring.CommandFailedEvent): span.set_attribute( "db.mongo.duration_micros", event.duration_micros ) - span.set_status(Status(StatusCanonicalCode.UNKNOWN, event.failure)) + span.set_status(UnknownStatus()) span.end() self._remove_span(event) diff --git a/ext/opentelemetry-ext-pymongo/tests/test_pymongo_integration.py b/ext/opentelemetry-ext-pymongo/tests/test_pymongo_integration.py index 95f0ae3413a..58012c5d427 100644 --- a/ext/opentelemetry-ext-pymongo/tests/test_pymongo_integration.py +++ b/ext/opentelemetry-ext-pymongo/tests/test_pymongo_integration.py @@ -82,7 +82,10 @@ def test_succeeded(self): self.assertIs( span.status.canonical_code, trace_api.status.StatusCanonicalCode.OK ) - self.assertEqual(span.status.description, "reply") + self.assertEqual( + span.status.description, + "Not an error, returned on success." + ) self.assertIsNotNone(span.end_time) def test_failed(self): @@ -100,7 +103,9 @@ def test_failed(self): span.status.canonical_code, trace_api.status.StatusCanonicalCode.UNKNOWN, ) - self.assertEqual(span.status.description, "failure") + self.assertEqual( + span.status.description, trace_api.status.UnknownStatus.__doc__ + ) self.assertIsNotNone(span.end_time) def test_multiple_commands(self): diff --git a/opentelemetry-api/src/opentelemetry/trace/__init__.py b/opentelemetry-api/src/opentelemetry/trace/__init__.py index 4f1dc539a39..3352f5f586a 100644 --- a/opentelemetry-api/src/opentelemetry/trace/__init__.py +++ b/opentelemetry-api/src/opentelemetry/trace/__init__.py @@ -66,7 +66,7 @@ import typing from contextlib import contextmanager -from opentelemetry.trace.status import Status +from opentelemetry.trace.status import Status, UnknownStatus from opentelemetry.util import loader, types # TODO: quarantine @@ -124,10 +124,12 @@ class SpanKind(enum.Enum): https://github.com/open-telemetry/opentelemetry-specification/pull/226. """ - #: Default value. Indicates that the span is used internally in the application. + #: Default value. Indicates that the span is used internally in the + # application. INTERNAL = 0 - #: Indicates that the span describes an operation that handles a remote request. + #: Indicates that the span describes an operation that handles a remote + # request. SERVER = 1 #: Indicates that the span describes a request to some remote service. @@ -144,9 +146,13 @@ class SpanKind(enum.Enum): CONSUMER = 4 -class Span: +class Span(object): """A span represents a single operation within a trace.""" + def __init__(self, auto_update_status=False): + # FIXME what is the standard to follow for documentation of attributes? + self._auto_update_status = auto_update_status + def start(self, start_time: typing.Optional[int] = None) -> None: """Sets the current time as the span's start time. @@ -237,8 +243,27 @@ def __exit__( exc_tb: typing.Optional[python_types.TracebackType], ) -> None: """Ends context manager and calls `end` on the `Span`.""" + self.end() + if ( + exc_val is not None and + self.status is None and + self._auto_update_status + ): + if isinstance(exc_val, Status): + self.set_status(exc_val.status) + + else: + self.set_status(UnknownStatus) + + # return False + # return True + + # FIXME https://docs.python.org/3.7/reference/datamodel.html?highlight=__exit__#object.__exit__ + # says that a boolean value should be returned. Is the lack of the + # return of a boolean value here intentional? + class TraceOptions(int): """A bitmask that represents options specific to the trace. diff --git a/opentelemetry-api/src/opentelemetry/trace/status.py b/opentelemetry-api/src/opentelemetry/trace/status.py index 4fc50b33e56..db07ebbae89 100644 --- a/opentelemetry-api/src/opentelemetry/trace/status.py +++ b/opentelemetry-api/src/opentelemetry/trace/status.py @@ -12,170 +12,184 @@ # See the License for the specific language governing permissions and # limitations under the License. -import enum -import typing - - -class StatusCanonicalCode(enum.Enum): - """Represents the canonical set of status codes of a finished Span.""" - - OK = 0 - """Not an error, returned on success.""" - - CANCELLED = 1 - """The operation was cancelled, typically by the caller.""" - - UNKNOWN = 2 - """Unknown error. - - For example, this error may be returned when a Status value received from - another address space belongs to an error space that is not known in this - address space. Also errors raised by APIs that do not return enough error - information may be converted to this error. - """ - - INVALID_ARGUMENT = 3 - """The client specified an invalid argument. - - Note that this differs from FAILED_PRECONDITION. INVALID_ARGUMENT indicates - arguments that are problematic regardless of the state of the system (e.g., - a malformed file name). - """ - - DEADLINE_EXCEEDED = 4 - """The deadline expired before the operation could complete. - - For operations that change the state of the system, this error may be - returned even if the operation has completed successfully. For example, a - successful response from a server could have been delayed long - """ - - NOT_FOUND = 5 - """Some requested entity (e.g., file or directory) was not found. - - Note to server developers: if a request is denied for an entire class of - users, such as gradual feature rollout or undocumented whitelist, NOT_FOUND - may be used. If a request is denied for some users within a class of users, - such as user-based access control, PERMISSION_DENIED must be used. - """ - - ALREADY_EXISTS = 6 - """The entity that a client attempted to create (e.g., file or directory) - already exists. - """ - - PERMISSION_DENIED = 7 - """The caller does not have permission to execute the specified operation. - - PERMISSION_DENIED must not be used for rejections caused by exhausting some - resource (use RESOURCE_EXHAUSTED instead for those errors). - PERMISSION_DENIED must not be used if the caller can not be identified (use - UNAUTHENTICATED instead for those errors). This error code does not imply - the request is valid or the requested entity exists or satisfies other - pre-conditions. - """ - - RESOURCE_EXHAUSTED = 8 - """Some resource has been exhausted, perhaps a per-user quota, or perhaps - the entire file system is out of space. - """ - - FAILED_PRECONDITION = 9 - """The operation was rejected because the system is not in a state required - for the operation's execution. - - For example, the directory to be deleted is non-empty, an rmdir operation - is applied to a non-directory, etc. Service implementors can use the - following guidelines to decide between FAILED_PRECONDITION, ABORTED, and - UNAVAILABLE: - - (a) Use UNAVAILABLE if the client can retry just the failing call. - (b) Use ABORTED if the client should retry at a higher level (e.g., - when a client-specified test-and-set fails, indicating the client - should restart a read-modify-write sequence). - (c) Use FAILED_PRECONDITION if the client should not retry until the - system state has been explicitly fixed. - - E.g., if an "rmdir" fails because the directory is non-empty, - FAILED_PRECONDITION should be returned since the client should not retry - unless the files are deleted from the directory. - """ - - ABORTED = 10 - """The operation was aborted, typically due to a concurrency issue such as a - sequencer check failure or transaction abort. - - See the guidelines above for deciding between FAILED_PRECONDITION, ABORTED, - and UNAVAILABLE. - """ - - OUT_OF_RANGE = 11 - """The operation was attempted past the valid range. - - E.g., seeking or reading past end-of-file. Unlike INVALID_ARGUMENT, this - error indicates a problem that may be fixed if the system state changes. - For example, a 32-bit file system will generate INVALID_ARGUMENT if asked - to read at an offset that is not in the range [0,2^32-1],but it will - generate OUT_OF_RANGE if asked to read from an offset past the current file - size. There is a fair bit of overlap between FAILED_PRECONDITION and - OUT_OF_RANGE. We recommend using OUT_OF_RANGE (the more specific error) - when it applies so that callers who are iterating through a space can - easily look for an OUT_OF_RANGE error to detect when they are done. - """ - - UNIMPLEMENTED = 12 - """The operation is not implemented or is not supported/enabled in this - service. - """ - - INTERNAL = 13 - """Internal errors. - - This means that some invariants expected by the underlying system have been - broken. This error code is reserved for serious errors. - """ - - UNAVAILABLE = 14 - """The service is currently unavailable. - - This is most likely a transient condition, which can be corrected by - retrying with a backoff. Note that it is not always safe to retry - non-idempotent operations. - """ - - DATA_LOSS = 15 - """Unrecoverable data loss or corruption.""" - - UNAUTHENTICATED = 16 - """The request does not have valid authentication credentials for the - operation. - """ - - -class Status: - """Represents the status of a finished Span. - - Args: - canonical_code: The canonical status code that describes the result - status of the operation. - description: An optional description of the status. - """ - - def __init__( - self, - canonical_code: "StatusCanonicalCode" = StatusCanonicalCode.OK, - description: typing.Optional[str] = None, - ): - self._canonical_code = canonical_code - self._description = description +from enum import Enum +from collections import OrderedDict +from itertools import count +from sys import modules +from typing import Optional + +_STATUS_META = OrderedDict([ + ( + "ok", + "Not an error, returned on success." + ), + ( + "cancelled", + "The operation was cancelled, typically by the caller." + ), + ( + "unknown", + """Unknown error. + + For example, this error may be returned when a Status value + received from another address space belongs to an error space that + is not known in this address space. Also errors raised by APIs that + do not return enough error information may be converted to this + error. + """ + ), + ( + "invalid argument", + """The client specified an invalid argument. + + Note that this differs from FAILED_PRECONDITION. INVALID_ARGUMENT + indicates arguments that are problematic regardless of the state of the + system (e.g., a malformed file name). + """ + ), + ( + "deadline exceeded", + """The deadline expired before the operation could complete. + + For operations that change the state of the system, this error may be + returned even if the operation has completed successfully. For example, + a successful response from a server could have been delayed long + """ + ), + ( + "not found", + """Some requested entity (e.g., file or directory) was not found. + + Note to server developers: if a request is denied for an entire class + of users, such as gradual feature rollout or undocumented whitelist, + NOT_FOUND may be used. If a request is denied for some users within a + class of users, such as user-based access control, PERMISSION_DENIED + must be used. + """ + ), + ( + "already exists", + "The entity that a client attempted to create (e.g., file" + " or directory) already exists." + ), + ( + "permission denied", + """The caller does not have permission to execute the specified operation. + + PERMISSION_DENIED must not be used for rejections caused by exhausting + some resource (use RESOURCE_EXHAUSTED instead for those errors). + PERMISSION_DENIED must not be used if the caller can not be identified + (use UNAUTHENTICATED instead for those errors). This error code does + not imply the request is valid or the requested entity exists or + satisfies other pre-conditions. + """ # noqa + ), + ( + "resource exhausted", + "Some resource has been exhausted, perhaps a per-user quota, or " + "perhaps the entire file system is out of space." + ), + ( + "failed precondition", + """The operation was rejected because the system is not in a state required for the operation's execution. + + For example, the directory to be deleted is non-empty, an rmdir + operation is applied to a non-directory, etc. Service implementors can + use the following guidelines to decide between FAILED_PRECONDITION, + ABORTED, and UNAVAILABLE: + + (a) Use UNAVAILABLE if the client can retry just the failing call. + (b) Use ABORTED if the client should retry at a higher level (e.g., + when a client-specified test-and-set fails, indicating the + client should restart a read-modify-write sequence). + (c) Use FAILED_PRECONDITION if the client should not retry until + the system state has been explicitly fixed. + + E.g., if an "rmdir" fails because the directory is non-empty, + FAILED_PRECONDITION should be returned since the client should not + retry unless the files are deleted from the directory. + """ # noqa + ), + ( + "aborted", + """The operation was aborted, typically due to a concurrency issue such as a sequencer check failure or transaction abort. + + See the guidelines above for deciding between FAILED_PRECONDITION, + ABORTED, and UNAVAILABLE. + """ # noqa + ), + ( + "out of range", + """The operation was attempted past the valid range. + + E.g., seeking or reading past end-of-file. Unlike INVALID_ARGUMENT, + this error indicates a problem that may be fixed if the system state + changes. For example, a 32-bit file system will generate + INVALID_ARGUMENT if asked to read at an offset that is not in the range + [0,2^32-1],but it will generate OUT_OF_RANGE if asked to read from an + offset past the current file size. There is a fair bit of overlap + between FAILED_PRECONDITION and OUT_OF_RANGE. We recommend using + OUT_OF_RANGE (the more specific error) when it applies so that callers + who are iterating through a space can easily look for an OUT_OF_RANGE + error to detect when they are done. + """ + ), + ( + "uninmplemented", + "The operation is not implemented or is not supported/enabled in this " + "service." + ), + ( + "internal", + """Internal errors. + + This means that some invariants expected by the underlying system have + been broken. This error code is reserved for serious errors. + """ + ), + ( + "unavailable", + """The service is currently unavailable. + + This is most likely a transient condition, which can be corrected by + retrying with a backoff. Note that it is not always safe to retry + non-idempotent operations. + """ + ), + ( + "data loss", + """Unrecoverable data loss or corruption.""" + ), + ( + "unauthenticated", + """The request does not have valid authentication credentials for the + operation. + """ + ) +]) + + +StatusCanonicalCode = Enum( + "StatusCanonicalCode", + [ + pair for pair in zip( + [key.upper().replace(" ", "_") for key in _STATUS_META.keys()], + count() + ) + ] +) + + +class Status(object): + """Represents the status of a finished Span.""" @property - def canonical_code(self) -> "StatusCanonicalCode": + def canonical_code(self) -> StatusCanonicalCode: """Represents the canonical status code of a finished Span.""" return self._canonical_code @property - def description(self) -> typing.Optional[str]: + def description(self) -> Optional[str]: """Status description""" return self._description @@ -183,3 +197,39 @@ def description(self) -> typing.Optional[str]: def is_ok(self) -> bool: """Returns false if this represents an error, true otherwise.""" return self._canonical_code is StatusCanonicalCode.OK + + +for index, pair in enumerate(_STATUS_META.items()): + key, value = pair + name = key.title().replace(" ", "_") + + status_name = "{}Status".format(name) + + status = type( + status_name, + (Status,), + { + "_canonical_code": StatusCanonicalCode(index), + # FIXME Is _description necessary if __doc__ will hold the same + # value? + "_description": value, + "__doc__": value + } + ) + + setattr(modules[__name__], status_name, status) + + error_name = "{}Error".format(name) + + setattr( + modules[__name__], + error_name, + type( + error_name, + (Exception,), + { + "__doc__": value, + "status": status + } + ) + ) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index 9ac9d81bc2c..42db05aadd4 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -133,8 +133,11 @@ def __init__( links: Sequence[trace_api.Link] = (), kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, span_processor: SpanProcessor = SpanProcessor(), + **kwargs ) -> None: + super(Span, self).__init__(**kwargs) + self.name = name self.context = context self.parent = parent @@ -144,7 +147,7 @@ def __init__( self.kind = kind self.span_processor = span_processor - self.status = trace_api.Status() + self.status = trace_api.status.OkStatus() self._lock = threading.Lock() if attributes is None: @@ -173,7 +176,10 @@ def __repr__(self): ) def __str__(self): - return '{}(name="{}", context={}, kind={}, parent={}, start_time={}, end_time={})'.format( + return ( + '{}(name="{}", context={}, kind={}, ' + 'parent={}, start_time={}, end_time={})' + ).format( type(self).__name__, self.name, self.context, @@ -348,6 +354,7 @@ def create_span( kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, attributes: Optional[types.Attributes] = None, links: Sequence[trace_api.Link] = (), + **kwargs, ) -> "trace_api.Span": """See `opentelemetry.trace.Tracer.create_span`. @@ -414,6 +421,7 @@ def create_span( span_processor=self._active_span_processor, kind=kind, links=links, + **kwargs, ) return trace_api.DefaultSpan(context=context) diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index 0d36c003e0f..773b0332c9e 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -387,18 +387,22 @@ def test_start_span(self): self.assertIs( span.status.canonical_code, trace_api.status.StatusCanonicalCode.OK ) - self.assertIs(span.status.description, None) + self.assertEqual( + span.status.description, + "Not an error, returned on success." + ) # status - new_status = trace_api.status.Status( - trace_api.status.StatusCanonicalCode.CANCELLED, "Test description" - ) + new_status = trace_api.status.CancelledStatus() span.set_status(new_status) self.assertIs( span.status.canonical_code, trace_api.status.StatusCanonicalCode.CANCELLED, ) - self.assertIs(span.status.description, "Test description") + self.assertEqual( + span.status.description, + "The operation was cancelled, typically by the caller." + ) def test_span_override_start_and_end_time(self): """Span sending custom start_time and end_time values""" @@ -437,10 +441,7 @@ def test_ended_span(self): root.update_name("xxx") self.assertEqual(root.name, "root") - new_status = trace_api.status.Status( - trace_api.status.StatusCanonicalCode.CANCELLED, - "Test description", - ) + new_status = trace_api.status.CancelledStatus() root.set_status(new_status) # default status self.assertTrue(root.status.is_ok) @@ -448,7 +449,10 @@ def test_ended_span(self): root.status.canonical_code, trace_api.status.StatusCanonicalCode.OK, ) - self.assertIs(root.status.description, None) + self.assertEqual( + root.status.description, + "Not an error, returned on success." + ) def span_event_start_fmt(span_processor_name, span_name): diff --git a/tox.ini b/tox.ini index 8f0d02d1ccb..0299fe08b3e 100644 --- a/tox.ini +++ b/tox.ini @@ -83,7 +83,7 @@ commands_pre = mypyinstalled: pip install file://{toxinidir}/opentelemetry-api/ commands = - test: pytest + test: pytest {posargs} coverage: {toxinidir}/scripts/coverage.sh mypy: mypy --namespace-packages opentelemetry-api/src/opentelemetry/