Skip to content

Commit

Permalink
Include the worker's name in the http.server.duration metric
Browse files Browse the repository at this point in the history
closes pulp#5844
  • Loading branch information
lubosmj committed Oct 2, 2024
1 parent db53f4b commit ed0cc11
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGES/5844.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Included the worker's name in the ``http.server.duration`` OpenTelemetry metric attributes.
4 changes: 2 additions & 2 deletions pulpcore/app/entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from contextvars import ContextVar
from logging import getLogger
import os
import socket

import click
import django
Expand All @@ -11,6 +10,7 @@
from gunicorn.workers.sync import SyncWorker

from pulpcore.app.apps import pulp_plugin_configs
from pulpcore.app.util import get_worker_name
from pulpcore.app.pulpcore_gunicorn_application import PulpcoreGunicornApplication

logger = getLogger(__name__)
Expand Down Expand Up @@ -58,7 +58,7 @@ def init_process(self):
self.ApiAppStatus = ApiAppStatus
self.api_app_status = None

self.name = "{pid}@{hostname}".format(pid=self.pid, hostname=socket.gethostname())
self.name = get_worker_name()
self.versions = {app.label: app.version for app in pulp_plugin_configs()}
self.beat_msg = (
"Api App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format(
Expand Down
6 changes: 6 additions & 0 deletions pulpcore/app/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import zlib
import os
import socket
import tempfile
import gnupg

Expand Down Expand Up @@ -658,6 +659,11 @@ def init_domain_metrics_exporter():
DomainMetricsEmitter.build(domain)


@lru_cache(maxsize=1)
def get_worker_name():
return f"{os.getpid()}@{socket.gethostname()}"


class PGAdvisoryLock:
"""
A context manager that will hold a postgres advisory lock non-blocking.
Expand Down
26 changes: 24 additions & 2 deletions pulpcore/app/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,39 @@
For more information on this file, see
https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/
"""

from django.core.wsgi import get_wsgi_application
from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
OTLPMetricExporter,
)
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader

from pulpcore.app.entrypoint import using_pulp_api_worker
from pulpcore.app.util import get_worker_name

if not using_pulp_api_worker.get(False):
raise RuntimeError("This app must be executed using pulpcore-api entrypoint.")


class WorkerNameMetricsExporter(OTLPMetricExporter):
def export(self, metrics_data, timeout_millis=10_000, **kwargs):
for resource_metric in metrics_data.resource_metrics:
for scope_metric in resource_metric.scope_metrics:
for metric in scope_metric.metrics:
if metric.name == "http.server.duration":
histogram_data = metric.data.data_points[0]
histogram_data.attributes["worker.process"] = get_worker_name()

return super().export(metrics_data, timeout_millis, **kwargs)


exporter = WorkerNameMetricsExporter()
reader = PeriodicExportingMetricReader(exporter)
provider = MeterProvider(metric_readers=[reader])

application = get_wsgi_application()
application = OpenTelemetryMiddleware(application)
application = OpenTelemetryMiddleware(application, meter_provider=provider)

# Disabling Storage metrics until we find a solution to resource usage.
# https://github.com/pulp/pulpcore/issues/5468
Expand Down
3 changes: 2 additions & 1 deletion pulpcore/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from pulpcore.app.apps import pulp_plugin_configs # noqa: E402: module level not at top of file
from pulpcore.app.models import ContentAppStatus # noqa: E402: module level not at top of file
from pulpcore.app.util import get_worker_name # noqa: E402: module level not at top of file

from .handler import Handler # noqa: E402: module level not at top of file
from .authentication import authenticate # noqa: E402: module level not at top of file
Expand All @@ -38,7 +39,7 @@

async def _heartbeat():
content_app_status = None
name = "{pid}@{hostname}".format(pid=os.getpid(), hostname=socket.gethostname())
name = get_worker_name()
heartbeat_interval = settings.CONTENT_APP_TTL // 4
msg = "Content App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format(
name=name, interarrival=heartbeat_interval
Expand Down
10 changes: 2 additions & 8 deletions pulpcore/content/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from multidict import CIMultiDict
import os
import re
import socket
from gettext import gettext as _
from functools import lru_cache

from aiohttp.client_exceptions import ClientResponseError
from aiohttp.web import FileResponse, StreamResponse, HTTPOk
Expand Down Expand Up @@ -56,6 +54,7 @@
from pulpcore.app.util import ( # noqa: E402: module level not at top of file
MetricsEmitter,
get_domain,
get_worker_name,
cache_key,
)

Expand All @@ -67,11 +66,6 @@
log = logging.getLogger(__name__)


@lru_cache(maxsize=1)
def _get_content_app_name():
return f"{os.getpid()}@{socket.gethostname()}"


class PathNotResolved(HTTPNotFound):
"""
The path could not be resolved to a published file.
Expand Down Expand Up @@ -1167,6 +1161,6 @@ async def finalize():
def _report_served_artifact_size(self, size):
attributes = {
"domain_name": get_domain().name,
"content_app_name": _get_content_app_name(),
"worker_name": get_worker_name(),
}
self.artifacts_size_counter.add(size, attributes)

0 comments on commit ed0cc11

Please sign in to comment.