diff --git a/pgxn/neon_test_utils/Makefile b/pgxn/neon_test_utils/Makefile index 13712724399d..252810b5b02e 100644 --- a/pgxn/neon_test_utils/Makefile +++ b/pgxn/neon_test_utils/Makefile @@ -7,7 +7,7 @@ OBJS = \ neontest.o EXTENSION = neon_test_utils -DATA = neon_test_utils--1.2.sql +DATA = neon_test_utils--1.3.sql PGFILEDESC = "neon_test_utils - helpers for neon testing and debugging" PG_CONFIG = pg_config diff --git a/pgxn/neon_test_utils/neon_test_utils--1.2.sql b/pgxn/neon_test_utils/neon_test_utils--1.3.sql similarity index 77% rename from pgxn/neon_test_utils/neon_test_utils--1.2.sql rename to pgxn/neon_test_utils/neon_test_utils--1.3.sql index f84a24ec8d48..3b8794a8cff4 100644 --- a/pgxn/neon_test_utils/neon_test_utils--1.2.sql +++ b/pgxn/neon_test_utils/neon_test_utils--1.3.sql @@ -45,3 +45,21 @@ CREATE FUNCTION neon_xlogflush(lsn pg_lsn DEFAULT NULL) RETURNS VOID AS 'MODULE_PATHNAME', 'neon_xlogflush' LANGUAGE C PARALLEL UNSAFE; + +CREATE FUNCTION trigger_panic() +RETURNS VOID +AS 'MODULE_PATHNAME', 'trigger_panic' +LANGUAGE C PARALLEL UNSAFE; + +CREATE FUNCTION trigger_segfault() +RETURNS VOID +AS 'MODULE_PATHNAME', 'trigger_segfault' +LANGUAGE C PARALLEL UNSAFE; + +-- Alias for `trigger_segfault`, just because `SELECT 💣()` looks fun +CREATE OR REPLACE FUNCTION 💣() RETURNS void +LANGUAGE plpgsql AS $$ +BEGIN + PERFORM trigger_segfault(); +END; +$$; diff --git a/pgxn/neon_test_utils/neon_test_utils.control b/pgxn/neon_test_utils/neon_test_utils.control index c7b9191ddc12..f22afd70c4fa 100644 --- a/pgxn/neon_test_utils/neon_test_utils.control +++ b/pgxn/neon_test_utils/neon_test_utils.control @@ -1,6 +1,6 @@ # neon_test_utils extension comment = 'helpers for neon testing and debugging' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/neon_test_utils' relocatable = true trusted = true diff --git a/pgxn/neon_test_utils/neontest.c b/pgxn/neon_test_utils/neontest.c index 071dc122edbd..650ef7405d64 100644 --- a/pgxn/neon_test_utils/neontest.c +++ b/pgxn/neon_test_utils/neontest.c @@ -42,6 +42,8 @@ PG_FUNCTION_INFO_V1(clear_buffer_cache); PG_FUNCTION_INFO_V1(get_raw_page_at_lsn); PG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex); PG_FUNCTION_INFO_V1(neon_xlogflush); +PG_FUNCTION_INFO_V1(trigger_panic); +PG_FUNCTION_INFO_V1(trigger_segfault); /* * Linkage to functions in neon module. @@ -489,3 +491,24 @@ neon_xlogflush(PG_FUNCTION_ARGS) XLogFlush(lsn); PG_RETURN_VOID(); } + +/* + * Function to trigger panic. + */ +Datum +trigger_panic(PG_FUNCTION_ARGS) +{ + elog(PANIC, "neon_test_utils: panic"); + PG_RETURN_VOID(); +} + +/* + * Function to trigger a segfault. + */ +Datum +trigger_segfault(PG_FUNCTION_ARGS) +{ + int *ptr = NULL; + *ptr = 42; + PG_RETURN_VOID(); +} diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index c002e11c1c08..5fb4d948175f 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -943,6 +943,8 @@ def __exit__( # if the test threw an exception, don't check for errors # as a failing assertion would cause the cleanup below to fail ps_assert_metric_no_errors=(exc_type is None), + # do not fail on endpoint errors to allow the rest of cleanup to proceed + fail_on_endpoint_errors=False, ) cleanup_error = None @@ -1214,11 +1216,11 @@ def start(self, timeout_in_seconds: Optional[int] = None): for f in futs: f.result() - def stop(self, immediate=False, ps_assert_metric_no_errors=False): + def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True): """ After this method returns, there should be no child processes running. """ - self.endpoints.stop_all() + self.endpoints.stop_all(fail_on_endpoint_errors) # Stop storage controller before pageservers: we don't want it to spuriously # detect a pageserver "failure" during test teardown @@ -3899,9 +3901,17 @@ def create( pageserver_id=pageserver_id, ) - def stop_all(self) -> "EndpointFactory": + def stop_all(self, fail_on_error=True) -> "EndpointFactory": + exception = None for ep in self.endpoints: - ep.stop() + try: + ep.stop() + except Exception as e: + log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}") + exception = e + + if fail_on_error and exception is not None: + raise exception return self diff --git a/test_runner/regress/test_endpoint_crash.py b/test_runner/regress/test_endpoint_crash.py new file mode 100644 index 000000000000..ae3dded437a0 --- /dev/null +++ b/test_runner/regress/test_endpoint_crash.py @@ -0,0 +1,23 @@ +import pytest +from fixtures.neon_fixtures import NeonEnvBuilder + + +@pytest.mark.parametrize( + "sql_func", + [ + "trigger_panic", + "trigger_segfault", + "💣", # calls `trigger_segfault` internally + ], +) +def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str): + """ + Test that triggering crash from neon_test_utils crashes the endpoint + """ + env = neon_env_builder.init_start() + env.neon_cli.create_branch("test_endpoint_crash") + endpoint = env.endpoints.create_start("test_endpoint_crash") + + endpoint.safe_psql("CREATE EXTENSION neon_test_utils;") + with pytest.raises(Exception, match="This probably means the server terminated abnormally"): + endpoint.safe_psql(f"SELECT {sql_func}();")