From 11b03c06a7c60eaba642799f4b0672848b19e6ee Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jul 2024 16:10:07 +0100 Subject: [PATCH 1/4] Fix `failures` key in `/keys/query` response --- synapse/handlers/e2e_keys.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 668cec513b..fc7372a323 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -291,14 +291,21 @@ async def _query(destination: str) -> None: # Only try and fetch keys for destinations that are not marked as # down. - filtered_destinations = await filter_destinations_by_retry_limiter( - remote_queries_not_in_cache.keys(), - self.clock, - self.store, - # Let's give an arbitrary grace period for those hosts that are - # only recently down - retry_due_within_ms=60 * 1000, + unfiltered_destinations = remote_queries_not_in_cache.keys() + filtered_destinations = set( + await filter_destinations_by_retry_limiter( + unfiltered_destinations, + self.clock, + self.store, + # Let's give an arbitrary grace period for those hosts that are + # only recently down + retry_due_within_ms=60 * 1000, + ) ) + failures |= { + dest: _NOT_READY_FOR_RETRY_FAILURE + for dest in (unfiltered_destinations - filtered_destinations) + } await concurrently_execute( _query, @@ -1641,6 +1648,9 @@ def _check_device_signature( raise SynapseError(400, "Invalid signature", Codes.INVALID_SIGNATURE) +_NOT_READY_FOR_RETRY_FAILURE = {"status": 503, "message": "Not ready for retry"} + + def _exception_to_failure(e: Exception) -> JsonDict: if isinstance(e, SynapseError): return {"status": e.code, "errcode": e.errcode, "message": str(e)} @@ -1649,7 +1659,7 @@ def _exception_to_failure(e: Exception) -> JsonDict: return {"status": e.code, "message": str(e)} if isinstance(e, NotRetryingDestination): - return {"status": 503, "message": "Not ready for retry"} + return _NOT_READY_FOR_RETRY_FAILURE # include ConnectionRefused and other errors # From 1602b3e4cece804d171346479a33ef36d769247f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jul 2024 17:16:24 +0100 Subject: [PATCH 2/4] Add test --- tests/handlers/test_e2e_keys.py | 59 +++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 0e6352ff4b..8a3dfdcf75 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -43,9 +43,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: self.appservice_api = mock.AsyncMock() - return self.setup_test_homeserver( - federation_client=mock.Mock(), application_service_api=self.appservice_api - ) + return self.setup_test_homeserver(application_service_api=self.appservice_api) def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.handler = hs.get_e2e_keys_handler() @@ -1224,6 +1222,61 @@ def test_query_devices_remote_sync(self) -> None: }, ) + def test_query_devices_remote_down(self) -> None: + """Tests that querying keys for a remote user on an unreachable server returns + results in the "failures" property + """ + + remote_user_id = "@test:other" + local_user_id = "@test:test" + + # The backoff code treats time zero as special + self.reactor.advance(5) + + self.hs.get_federation_http_client().agent.request = mock.AsyncMock( # type: ignore[method-assign] + side_effect=Exception("boop") + ) + + e2e_handler = self.hs.get_e2e_keys_handler() + + query_result = self.get_success( + e2e_handler.query_devices( + { + "device_keys": {remote_user_id: []}, + }, + timeout=10, + from_user_id=local_user_id, + from_device_id="some_device_id", + ) + ) + + self.assertEqual( + query_result["failures"], + { + "other": { + "message": "Failed to send request: Exception: boop", + "status": 503, + } + }, + ) + + # Do it again: we should hit the backoff + query_result = self.get_success( + e2e_handler.query_devices( + { + "device_keys": {remote_user_id: []}, + }, + timeout=10, + from_user_id=local_user_id, + from_device_id="some_device_id", + ) + ) + + self.assertEqual( + query_result["failures"], + {"other": {"message": "Not ready for retry", "status": 503}}, + ) + @parameterized.expand( [ # The remote homeserver's response indicates that this user has 0/1/2 devices. From f6ef1adbcaeeacedee7b71ce74c992c458a3ff46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jul 2024 17:17:40 +0100 Subject: [PATCH 3/4] changelog --- changelog.d/17499.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17499.bugfix diff --git a/changelog.d/17499.bugfix b/changelog.d/17499.bugfix new file mode 100644 index 0000000000..5cb7b3c30e --- /dev/null +++ b/changelog.d/17499.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.110.0 which caused `/keys/query` to return incomplete results, leading to high network activity and CPU usage on Matrix clients. From b1b4ebd094ee9271401561d8e99ff2b2f25ada81 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jul 2024 17:25:10 +0100 Subject: [PATCH 4/4] Fix for Python 3.9 --- synapse/handlers/e2e_keys.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index fc7372a323..f78e66ad0a 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -302,10 +302,10 @@ async def _query(destination: str) -> None: retry_due_within_ms=60 * 1000, ) ) - failures |= { - dest: _NOT_READY_FOR_RETRY_FAILURE + failures.update( + (dest, _NOT_READY_FOR_RETRY_FAILURE) for dest in (unfiltered_destinations - filtered_destinations) - } + ) await concurrently_execute( _query,