Skip to content

Commit

Permalink
tests: fix metrics check in test_s3_eviction (#8419)
Browse files Browse the repository at this point in the history
## Problem

This test would occasionally fail its metric check. This could happen in
the rare case that the nodes had all been restarted before their most
recent eviction.

The metric check was added in
#8348

## Summary of changes

- Check metrics before each restart, accumulate into a bool that we
assert on at the end of the test
  • Loading branch information
jcsp authored and problame committed Jul 22, 2024
1 parent 3d2c2ce commit de9bf2a
Showing 1 changed file with 24 additions and 19 deletions.
43 changes: 24 additions & 19 deletions test_runner/regress/test_wal_acceptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2242,6 +2242,8 @@ def test_s3_eviction(

check_values = [0] * n_timelines

event_metrics_seen = False

n_iters = 20
for _ in range(n_iters):
if log.isEnabledFor(logging.DEBUG):
Expand All @@ -2266,6 +2268,27 @@ def test_s3_eviction(
# update remote_consistent_lsn on pageserver
ps_client.timeline_checkpoint(env.initial_tenant, timelines[i], wait_until_uploaded=True)

# Do metrics check before restarts, since these will reset to zero across a restart
event_metrics_seen |= any(
sk.http_client().get_metric_value(
"safekeeper_eviction_events_started_total", {"kind": "evict"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_completed_total", {"kind": "evict"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_started_total", {"kind": "restore"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_completed_total", {"kind": "restore"}
)
or 0 > 0
for sk in env.safekeepers
)

# restarting random safekeepers
for sk in env.safekeepers:
if random.random() < restart_chance:
Expand All @@ -2280,22 +2303,4 @@ def test_s3_eviction(
for sk in env.safekeepers
)

assert any(
sk.http_client().get_metric_value(
"safekeeper_eviction_events_started_total", {"kind": "evict"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_completed_total", {"kind": "evict"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_started_total", {"kind": "restore"}
)
or 0 > 0
and sk.http_client().get_metric_value(
"safekeeper_eviction_events_completed_total", {"kind": "restore"}
)
or 0 > 0
for sk in env.safekeepers
)
assert event_metrics_seen

0 comments on commit de9bf2a

Please sign in to comment.