diff --git a/src/sentry/runner/commands/backup.py b/src/sentry/runner/commands/backup.py index 05c38ed466c4c5..087a8b9b360a1e 100644 --- a/src/sentry/runner/commands/backup.py +++ b/src/sentry/runner/commands/backup.py @@ -1,6 +1,8 @@ from __future__ import annotations +from difflib import unified_diff from io import StringIO +from typing import NamedTuple, NewType import click from django.apps import apps @@ -8,8 +10,96 @@ from django.db import IntegrityError, connection, transaction from sentry.runner.decorators import configuration +from sentry.utils.json import JSONData, JSONEncoder, better_default_encoder EXCLUDED_APPS = frozenset(("auth", "contenttypes")) +JSON_PRETTY_PRINTER = JSONEncoder( + default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True +) + +ComparatorName = NewType("ComparatorName", str) +ModelName = NewType("ModelName", str) + + +# TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead. +class InstanceID(NamedTuple): + """Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier.""" + + model: ModelName + pk: int + + def pretty(self) -> str: + return f"InstanceID(model: {self.model!r}, pk: {self.pk})" + + +class ComparatorFinding(NamedTuple): + """Store all information about a single failed matching between expected and actual output.""" + + name: ComparatorName + on: InstanceID + reason: str = "" + + def pretty(self) -> str: + return f"Finding(\n\tname: {self.name!r},\n\ton: {self.on.pretty()},\n\treason: {self.reason}\n)" + + +class ComparatorFindings: + """A wrapper type for a list of 'ComparatorFinding' which enables pretty-printing in asserts.""" + + def __init__(self, findings: list[ComparatorFinding]): + self.findings = findings + + def append(self, finding: ComparatorFinding) -> None: + self.findings.append(finding) + + def pretty(self) -> str: + return "\n".join(f.pretty() for f in self.findings) + + +def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings: + """Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't""" + + def json_lines(obj: JSONData) -> list[str]: + """Take a JSONData object and pretty-print it as JSON.""" + + return JSON_PRETTY_PRINTER.encode(obj).splitlines() + + findings = ComparatorFindings([]) + exp_models = {} + act_models = {} + for model in expect: + id = InstanceID(model["model"], model["pk"]) + exp_models[id] = model + + # Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not. + for model in actual: + id = InstanceID(model["model"], model["pk"]) + if id in act_models: + findings.append(ComparatorFinding("duplicate_entry", id)) + else: + act_models[id] = model + + # Report unexpected and missing entries in the actual JSON. + extra = sorted(act_models.keys() - exp_models.keys()) + missing = sorted(exp_models.keys() - act_models.keys()) + for id in extra: + del act_models[id] + findings.append(ComparatorFinding("unexpected_entry", id)) + for id in missing: + del exp_models[id] + findings.append(ComparatorFinding("missing_entry", id)) + + # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both + # outputs. + for id, act in act_models.items(): + exp = exp_models[id] + + # Finally, perform a diff on the remaining JSON. + diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3)) + if diff: + findings.append(ComparatorFinding("json_diff", id, "\n " + "\n ".join(diff))) + + return findings @click.command(name="import") diff --git a/src/sentry/utils/pytest/fixtures.py b/src/sentry/utils/pytest/fixtures.py index ba8f4cfc38dee1..5a7e07fecbc964 100644 --- a/src/sentry/utils/pytest/fixtures.py +++ b/src/sentry/utils/pytest/fixtures.py @@ -139,14 +139,18 @@ } -def django_db_all(func=None, *, transaction=None, **kwargs): +def django_db_all(func=None, *, transaction=None, reset_sequences=None, **kwargs): """Pytest decorator for resetting all databases""" if func is not None: - return pytest.mark.django_db(transaction=transaction, databases="__all__")(func) + return pytest.mark.django_db( + transaction=transaction, reset_sequences=reset_sequences, databases="__all__" + )(func) def decorator(function): - return pytest.mark.django_db(transaction=transaction, databases="__all__")(function) + return pytest.mark.django_db( + transaction=transaction, reset_sequences=reset_sequences, databases="__all__" + )(function) return decorator diff --git a/tests/sentry/backup/test_correctness.py b/tests/sentry/backup/test_correctness.py index f6c17ad0a9589a..7f85bdb2688d97 100644 --- a/tests/sentry/backup/test_correctness.py +++ b/tests/sentry/backup/test_correctness.py @@ -1,111 +1,25 @@ -from __future__ import annotations - -from difflib import unified_diff from pathlib import Path -from typing import NamedTuple import pytest from click.testing import CliRunner from freezegun import freeze_time from sentry.db.postgres.roles import in_test_psql_role_override -from sentry.runner.commands.backup import export, import_ +from sentry.runner.commands.backup import ComparatorFindings, export, import_, validate from sentry.testutils.factories import get_fixture_path from sentry.utils import json -from sentry.utils.json import JSONData, JSONEncoder, better_default_encoder - - -# TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead. -class InstanceID(NamedTuple): - """Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier.""" - - model: str - pk: int - - def pretty(self) -> str: - return f"InstanceID(model: {self.model!r}, pk: {self.pk})" - - -class ComparatorFinding(NamedTuple): - """Store all information about a single failed matching between expected and actual output.""" - - name: str - on: InstanceID - reason: str = "" - - def pretty(self) -> str: - return f"Finding(\n\tname: {self.name!r},\n\ton: {self.on.pretty()},\n\treason: {self.reason}\n)" - - -class ComparatorFindings: - """A wrapper type for a list of 'ComparatorFinding' which enables pretty-printing in asserts.""" - - def __init__(self, findings: list[ComparatorFinding]): - self.findings = findings - - def append(self, finding: ComparatorFinding) -> None: - self.findings.append(finding) - - def pretty(self) -> str: - return "\n".join(f.pretty() for f in self.findings) +from sentry.utils.pytest.fixtures import django_db_all -JSON_PRETTY_PRINTER = JSONEncoder( - default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True -) +class ValidationError(Exception): + def __init__(self, info: ComparatorFindings): + super().__init__(info.pretty()) + self.info = info -def json_lines(obj: JSONData) -> list[str]: - """Take a JSONData object and pretty-print it as JSON.""" - - return JSON_PRETTY_PRINTER.encode(obj).splitlines() - - -# TODO(team-ospo/#155): Move this out of the test suite, and into its own standalone module, since eventually it will be used to compare live JSON as well. -def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings: - """Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't""" - - findings = ComparatorFindings([]) - exp_models = {} - act_models = {} - for model in expect: - id = InstanceID(model["model"], model["pk"]) - exp_models[id] = model - - # Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not. - for model in actual: - id = InstanceID(model["model"], model["pk"]) - if id in act_models: - findings.append(ComparatorFinding("duplicate_entry", id)) - else: - act_models[id] = model - - # Report unexpected and missing entries in the actual JSON. - extra = sorted(act_models.keys() - exp_models.keys()) - missing = sorted(exp_models.keys() - act_models.keys()) - for id in extra: - del act_models[id] - findings.append(ComparatorFinding("unexpected_entry", id)) - for id in missing: - del exp_models[id] - findings.append(ComparatorFinding("missing_entry", id)) - - # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both - # outputs. - for id, act in act_models.items(): - exp = exp_models[id] - - # Finally, perform a diff on the remaining JSON. - diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3)) - if diff: - findings.append(ComparatorFinding("json_diff", id, "\n " + "\n ".join(diff))) - - return findings - - -def import_then_export(tmp_path: Path, fixture_file_name: str) -> None: +def import_export_then_validate(tmp_path: Path, fixture_file_name: str) -> None: """Test helper that validates that the originally imported data correctly matches actual - outputted data, and produces a list of reasons why not when it doesn't""" + outputted export data.""" fixture_file_path = get_fixture_path("backup", fixture_file_name) with open(fixture_file_path) as backup_file: @@ -126,10 +40,17 @@ def import_then_export(tmp_path: Path, fixture_file_name: str) -> None: res = validate(input, output) if res.findings: - raise AssertionError(res.pretty()) + raise ValidationError(res) -@pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__") +@django_db_all(transaction=True, reset_sequences=True) @freeze_time("2023-06-22T23:00:00.123Z") -def test_fresh_install(tmp_path): - import_then_export(tmp_path, "fresh-install.json") +def test_good_fresh_install_validation(tmp_path): + import_export_then_validate(tmp_path, "fresh-install.json") + + +@django_db_all(transaction=True, reset_sequences=True) +def test_bad_fresh_install_validation(tmp_path): + with pytest.raises(ValidationError) as excinfo: + import_export_then_validate(tmp_path, "fresh-install.json") + assert len(excinfo.value.info.findings) == 2