From ed0cc11b0a724e4115d5653b4b7bd4f555912118 Mon Sep 17 00:00:00 2001 From: Lubos Mjachky Date: Wed, 2 Oct 2024 16:51:41 +0200 Subject: [PATCH] Include the worker's name in the http.server.duration metric closes #5844 --- CHANGES/5844.feature | 1 + pulpcore/app/entrypoint.py | 4 ++-- pulpcore/app/util.py | 6 ++++++ pulpcore/app/wsgi.py | 26 ++++++++++++++++++++++++-- pulpcore/content/__init__.py | 3 ++- pulpcore/content/handler.py | 10 ++-------- 6 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 CHANGES/5844.feature diff --git a/CHANGES/5844.feature b/CHANGES/5844.feature new file mode 100644 index 0000000000..0585747f54 --- /dev/null +++ b/CHANGES/5844.feature @@ -0,0 +1 @@ +Included the worker's name in the ``http.server.duration`` OpenTelemetry metric attributes. diff --git a/pulpcore/app/entrypoint.py b/pulpcore/app/entrypoint.py index 8a7b94b5da..f8647e9798 100644 --- a/pulpcore/app/entrypoint.py +++ b/pulpcore/app/entrypoint.py @@ -1,7 +1,6 @@ from contextvars import ContextVar from logging import getLogger import os -import socket import click import django @@ -11,6 +10,7 @@ from gunicorn.workers.sync import SyncWorker from pulpcore.app.apps import pulp_plugin_configs +from pulpcore.app.util import get_worker_name from pulpcore.app.pulpcore_gunicorn_application import PulpcoreGunicornApplication logger = getLogger(__name__) @@ -58,7 +58,7 @@ def init_process(self): self.ApiAppStatus = ApiAppStatus self.api_app_status = None - self.name = "{pid}@{hostname}".format(pid=self.pid, hostname=socket.gethostname()) + self.name = get_worker_name() self.versions = {app.label: app.version for app in pulp_plugin_configs()} self.beat_msg = ( "Api App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format( diff --git a/pulpcore/app/util.py b/pulpcore/app/util.py index dacaedce90..a999068421 100644 --- a/pulpcore/app/util.py +++ b/pulpcore/app/util.py @@ -1,6 +1,7 @@ import hashlib import zlib import os +import socket import tempfile import gnupg @@ -658,6 +659,11 @@ def init_domain_metrics_exporter(): DomainMetricsEmitter.build(domain) +@lru_cache(maxsize=1) +def get_worker_name(): + return f"{os.getpid()}@{socket.gethostname()}" + + class PGAdvisoryLock: """ A context manager that will hold a postgres advisory lock non-blocking. diff --git a/pulpcore/app/wsgi.py b/pulpcore/app/wsgi.py index 5287224e8a..542abb7100 100644 --- a/pulpcore/app/wsgi.py +++ b/pulpcore/app/wsgi.py @@ -6,17 +6,39 @@ For more information on this file, see https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/ """ - from django.core.wsgi import get_wsgi_application from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware +from opentelemetry.exporter.otlp.proto.http.metric_exporter import ( + OTLPMetricExporter, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from pulpcore.app.entrypoint import using_pulp_api_worker +from pulpcore.app.util import get_worker_name if not using_pulp_api_worker.get(False): raise RuntimeError("This app must be executed using pulpcore-api entrypoint.") + +class WorkerNameMetricsExporter(OTLPMetricExporter): + def export(self, metrics_data, timeout_millis=10_000, **kwargs): + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + if metric.name == "http.server.duration": + histogram_data = metric.data.data_points[0] + histogram_data.attributes["worker.process"] = get_worker_name() + + return super().export(metrics_data, timeout_millis, **kwargs) + + +exporter = WorkerNameMetricsExporter() +reader = PeriodicExportingMetricReader(exporter) +provider = MeterProvider(metric_readers=[reader]) + application = get_wsgi_application() -application = OpenTelemetryMiddleware(application) +application = OpenTelemetryMiddleware(application, meter_provider=provider) # Disabling Storage metrics until we find a solution to resource usage. # https://github.com/pulp/pulpcore/issues/5468 diff --git a/pulpcore/content/__init__.py b/pulpcore/content/__init__.py index 3768c3ee3b..125d94dff8 100644 --- a/pulpcore/content/__init__.py +++ b/pulpcore/content/__init__.py @@ -24,6 +24,7 @@ from pulpcore.app.apps import pulp_plugin_configs # noqa: E402: module level not at top of file from pulpcore.app.models import ContentAppStatus # noqa: E402: module level not at top of file +from pulpcore.app.util import get_worker_name # noqa: E402: module level not at top of file from .handler import Handler # noqa: E402: module level not at top of file from .authentication import authenticate # noqa: E402: module level not at top of file @@ -38,7 +39,7 @@ async def _heartbeat(): content_app_status = None - name = "{pid}@{hostname}".format(pid=os.getpid(), hostname=socket.gethostname()) + name = get_worker_name() heartbeat_interval = settings.CONTENT_APP_TTL // 4 msg = "Content App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format( name=name, interarrival=heartbeat_interval diff --git a/pulpcore/content/handler.py b/pulpcore/content/handler.py index bc0c0181b8..7be860b5df 100644 --- a/pulpcore/content/handler.py +++ b/pulpcore/content/handler.py @@ -3,9 +3,7 @@ from multidict import CIMultiDict import os import re -import socket from gettext import gettext as _ -from functools import lru_cache from aiohttp.client_exceptions import ClientResponseError from aiohttp.web import FileResponse, StreamResponse, HTTPOk @@ -56,6 +54,7 @@ from pulpcore.app.util import ( # noqa: E402: module level not at top of file MetricsEmitter, get_domain, + get_worker_name, cache_key, ) @@ -67,11 +66,6 @@ log = logging.getLogger(__name__) -@lru_cache(maxsize=1) -def _get_content_app_name(): - return f"{os.getpid()}@{socket.gethostname()}" - - class PathNotResolved(HTTPNotFound): """ The path could not be resolved to a published file. @@ -1167,6 +1161,6 @@ async def finalize(): def _report_served_artifact_size(self, size): attributes = { "domain_name": get_domain().name, - "content_app_name": _get_content_app_name(), + "worker_name": get_worker_name(), } self.artifacts_size_counter.add(size, attributes)