Skip to content

Commit

Permalink
refactor(test): duplication with fullbackup, tar content hashing (#7828)
Browse files Browse the repository at this point in the history
"taking a fullbackup" is an ugly multi-liner copypasted in multiple
places, most recently with timeline ancestor detach tests. move it under
`PgBin` which is not a great place, but better than yet another utility
function.

Additionally:
- cleanup `psql_env` repetition (PgBin already configures that)
- move the backup tar comparison as a yet another free utility function
- use backup tar comparison in `test_import.py` where a size check was
done previously
- cleanup extra timeline creation from test

Cc: #7715
  • Loading branch information
koivunej authored May 22, 2024
1 parent ef96c82 commit df9ab1b
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 145 deletions.
22 changes: 22 additions & 0 deletions test_runner/fixtures/neon_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2788,6 +2788,28 @@ def get_pg_controldata_checkpoint_lsn(self, pgdata: str) -> Lsn:
log.info(f"last checkpoint at {checkpoint_lsn}")
return Lsn(checkpoint_lsn)

def take_fullbackup(
self,
pageserver: NeonPageserver,
tenant: TenantId,
timeline: TimelineId,
lsn: Lsn,
output: Path,
):
"""
Request fullbackup from pageserver, store it at 'output'.
"""
cmd = [
"psql",
"--no-psqlrc",
pageserver.connstr(),
"-c",
f"fullbackup {tenant} {timeline} {lsn}",
"-o",
str(output),
]
self.run_capture(cmd)


@pytest.fixture(scope="function")
def pg_bin(test_output_dir: Path, pg_distrib_dir: Path, pg_version: PgVersion) -> PgBin:
Expand Down
50 changes: 50 additions & 0 deletions test_runner/fixtures/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@
import os
import re
import subprocess
import tarfile
import threading
import time
from hashlib import sha256
from pathlib import Path
from typing import (
IO,
TYPE_CHECKING,
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
TypeVar,
Union,
)
from urllib.parse import urlencode

Expand Down Expand Up @@ -499,3 +504,48 @@ def __repr__(self) -> str:

def __str__(self) -> str:
return f"'aux-{self.value}'"


def assert_pageserver_backups_equal(left: Path, right: Path, skip_files: Set[str]):
"""
This is essentially:
lines=$(comm -3 \
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
<(mkdir right && cd right && tar xf "$right" && find . -type f -print0 | xargs sha256sum | sort -k2) \
| wc -l)
[ "$lines" = "0" ]
But in a more mac friendly fashion.
"""
started_at = time.time()

def hash_extracted(reader: Union[IO[bytes], None]) -> bytes:
assert reader is not None
digest = sha256(usedforsecurity=False)
while True:
buf = reader.read(64 * 1024)
if not buf:
break
digest.update(buf)
return digest.digest()

def build_hash_list(p: Path) -> List[Tuple[str, bytes]]:
with tarfile.open(p) as f:
matching_files = (info for info in f if info.isreg() and info.name not in skip_files)
ret = list(
map(lambda info: (info.name, hash_extracted(f.extractfile(info))), matching_files)
)
ret.sort(key=lambda t: t[0])
return ret

left_list, right_list = map(build_hash_list, [left, right])

try:
assert len(left_list) == len(right_list)

for left_tuple, right_tuple in zip(left_list, right_list):
assert left_tuple == right_tuple
finally:
elapsed = time.time() - started_at
log.info(f"assert_pageserver_backups_equal completed in {elapsed}s")
23 changes: 9 additions & 14 deletions test_runner/regress/test_fullbackup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from pathlib import Path

from fixtures.common_types import Lsn, TimelineId
from fixtures.common_types import Lsn
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
Expand All @@ -19,17 +19,16 @@ def test_fullbackup(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
port_distributor: PortDistributor,
pg_distrib_dir: Path,
test_output_dir: Path,
):
env = neon_env_builder.init_start()

env.neon_cli.create_branch("test_fullbackup")
endpoint_main = env.endpoints.create_start("test_fullbackup")
# endpoint needs to be alive until the fullbackup so that we have
# prev_record_lsn for the vanilla_pg to start in read-write mode
# for some reason this does not happen if endpoint is shutdown.
endpoint_main = env.endpoints.create_start("main")

with endpoint_main.cursor() as cur:
timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id"))

# data loading may take a while, so increase statement timeout
cur.execute("SET statement_timeout='300s'")
cur.execute(
Expand All @@ -41,17 +40,13 @@ def test_fullbackup(
lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
log.info(f"start_backup_lsn = {lsn}")

# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}

# Get and unpack fullbackup from pageserver
restored_dir_path = env.repo_dir / "restored_datadir"
os.mkdir(restored_dir_path, 0o750)
query = f"fullbackup {env.initial_tenant} {timeline} {lsn}"
tar_output_file = test_output_dir / "fullbackup.tar"
cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query, "-o", str(tar_output_file)]
pg_bin.run_capture(cmd, env=psql_env)
pg_bin.take_fullbackup(
env.pageserver, env.initial_tenant, env.initial_timeline, lsn, tar_output_file
)
subprocess_capture(
env.repo_dir, ["tar", "-xf", str(tar_output_file), "-C", str(restored_dir_path)]
)
Expand All @@ -61,7 +56,7 @@ def test_fullbackup(
# use resetwal to overwrite it
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
cmd = [pg_resetwal_path, "-D", str(restored_dir_path)]
pg_bin.run_capture(cmd, env=psql_env)
pg_bin.run_capture(cmd)

# Restore from the backup and find the data we inserted
port = port_distributor.get_port()
Expand Down
25 changes: 4 additions & 21 deletions test_runner/regress/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
wait_for_upload,
)
from fixtures.remote_storage import RemoteStorageKind
from fixtures.utils import subprocess_capture
from fixtures.utils import assert_pageserver_backups_equal, subprocess_capture


def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_builder):
Expand Down Expand Up @@ -248,15 +248,9 @@ def _import(
path to the backup archive file"""
log.info(f"start_backup_lsn = {lsn}")

# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}

# Get a fullbackup from pageserver
query = f"fullbackup { env.initial_tenant} {timeline} {lsn}"
tar_output_file = test_output_dir / "fullbackup.tar"
cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query, "-o", str(tar_output_file)]
pg_bin.run_capture(cmd, env=psql_env)
pg_bin.take_fullbackup(env.pageserver, env.initial_tenant, timeline, lsn, tar_output_file)

# Stop the first pageserver instance, erase all its data
env.endpoints.stop_all()
Expand Down Expand Up @@ -305,22 +299,11 @@ def _import(
assert endpoint.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]

# Take another fullbackup
query = f"fullbackup { tenant} {timeline} {lsn}"
new_tar_output_file = test_output_dir / "fullbackup-new.tar"
cmd = [
"psql",
"--no-psqlrc",
env.pageserver.connstr(),
"-c",
query,
"-o",
str(new_tar_output_file),
]
pg_bin.run_capture(cmd, env=psql_env)
pg_bin.take_fullbackup(env.pageserver, tenant, timeline, lsn, new_tar_output_file)

# Check it's the same as the first fullbackup
# TODO pageserver should be checking checksum
assert os.path.getsize(tar_output_file) == os.path.getsize(new_tar_output_file)
assert_pageserver_backups_equal(tar_output_file, new_tar_output_file, set())

# Check that gc works
pageserver_http = env.pageserver.http_client()
Expand Down
11 changes: 3 additions & 8 deletions test_runner/regress/test_next_xid.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from fixtures.common_types import Lsn, TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_wal_insert_lsn
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, wait_for_wal_insert_lsn
from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
)
Expand Down Expand Up @@ -71,22 +71,17 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
def test_import_at_2bil(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_distrib_dir: Path,
pg_bin,
pg_bin: PgBin,
vanilla_pg,
):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
ps_http = env.pageserver.http_client()

# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}

# Reset the vanilla Postgres instance to somewhat before 2 billion transactions.
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
cmd = [pg_resetwal_path, "--next-transaction-id=2129920000", "-D", str(vanilla_pg.pgdatadir)]
pg_bin.run_capture(cmd, env=psql_env)
pg_bin.run_capture(cmd)

vanilla_pg.start()
vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
Expand Down
Loading

1 comment on commit df9ab1b

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3184 tests run: 3043 passed, 1 failed, 140 skipped (full report)


Failures on Postgres 14

  • test_download_churn[github-actions-selfhosted-100-std-fs-30]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_download_churn[release-pg14-github-actions-selfhosted-100-std-fs-30]"

Code coverage* (full report)

  • functions: 31.3% (6414 of 20481 functions)
  • lines: 48.0% (49312 of 102647 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
df9ab1b at 2024-05-22T14:01:31.317Z :recycle:

Please sign in to comment.