Skip to content

Commit

Permalink
Fix test_s3_eviction
Browse files Browse the repository at this point in the history
  • Loading branch information
petuhovskiy committed Jun 24, 2024
1 parent d226955 commit 992b565
Showing 1 changed file with 35 additions and 2 deletions.
37 changes: 35 additions & 2 deletions test_runner/regress/test_wal_acceptor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import filecmp
import logging
import os
import random
import shutil
Expand Down Expand Up @@ -2180,6 +2181,14 @@ def do_something():
do_something()


# Test creates 5 endpoints and tries to wake them up randomly. All timeouts are
# configured to be very short, so that we expect that:
# - pageserver will update remote_consistent_lsn very often
# - safekeepers will upload partial WAL segments very often
# - safekeeper will try to evict and unevict timelines
#
# Test checks that there are no critical errors while doing this. Also it checks
# that every safekeeper has at least one successful eviction.
@pytest.mark.parametrize("delete_offloaded_wal", [False, True])
@pytest.mark.parametrize("restart_chance", [0.0, 0.2])
def test_s3_eviction(
Expand All @@ -2203,32 +2212,56 @@ def test_s3_eviction(
n_timelines = 5

branch_names = [f"branch{tlin}" for tlin in range(n_timelines)]
timelines = []

# start postgres on each timeline
endpoints: list[Endpoint] = []
for branch_name in branch_names:
env.neon_cli.create_branch(branch_name)
timeline_id = env.neon_cli.create_branch(branch_name)
timelines.append(timeline_id)

endpoints.append(env.endpoints.create_start(branch_name))
endpoints[-1].safe_psql("CREATE TABLE t(i int)")
endpoints[-1].safe_psql("INSERT INTO t VALUES (0)")

lsn = endpoints[-1].safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]
log.info(f"{branch_name}: LSN={lsn}")

endpoints[-1].stop()

check_values = [0] * n_timelines
ps_client = env.pageservers[0].http_client()

n_iters = 20
for _ in range(n_iters):
if log.isEnabledFor(logging.DEBUG):
for j in range(n_timelines):
detail = ps_client.timeline_detail(env.initial_tenant, timelines[j])
log.debug(
f'{branch_names[j]}: RCL={detail["remote_consistent_lsn"]}, LRL={detail["last_record_lsn"]}'
)

i = random.randint(0, n_timelines - 1)
log.info(f"Starting endpoint {i}")
endpoints[i].start()
check_values[i] += 1
res = endpoints[i].safe_psql("UPDATE t SET i = i + 1 RETURNING i")
assert res[0][0] == check_values[i]

lsn = endpoints[i].safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]
log.info(f"{branch_names[i]}: LSN={lsn}")

endpoints[i].stop()

# update remote_consistent_lsn on pageserver
ps_client.timeline_checkpoint(env.initial_tenant, timelines[i], wait_until_uploaded=True)

# restarting random safekeepers
for sk in env.safekeepers:
if random.random() < restart_chance:
sk.stop().start(extra_opts=extra_opts)
time.sleep(0.5)

# TODO: check logs for successful eviction
# require at least one successful eviction on each safekeeper
for sk in env.safekeepers:
assert sk.log_contains("successfully evicted timeline")

0 comments on commit 992b565

Please sign in to comment.