From 28fb5d0a25febf271acc419c71d8e9a95dc8ef84 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 17:59:55 -0500 Subject: [PATCH 1/5] Trace time in _check_sigs_and_hash_and_fetch Fix https://github.com/matrix-org/synapse/issues/13587 --- synapse/crypto/event_signing.py | 2 ++ synapse/events/spamcheck.py | 2 ++ synapse/federation/federation_base.py | 22 ++++++++++++++++++++++ synapse/federation/federation_client.py | 16 +++++++++++++++- synapse/util/ratelimitutils.py | 5 +++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 7520647d1e1d..7c5a21ff4c8b 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -28,6 +28,7 @@ from synapse.api.room_versions import RoomVersion from synapse.events import EventBase from synapse.events.utils import prune_event, prune_event_dict +from synapse.logging.tracing import trace from synapse.types import JsonDict logger = logging.getLogger(__name__) @@ -35,6 +36,7 @@ Hasher = Callable[[bytes], "hashlib._Hash"] +@trace def check_event_content_hash( event: EventBase, hash_algorithm: Hasher = hashlib.sha256 ) -> bool: diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 4a3bfb38f1dc..c691ef5973e4 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -32,6 +32,7 @@ import synapse from synapse.api.errors import Codes +from synapse.logging.tracing import trace from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour @@ -378,6 +379,7 @@ def register_callbacks( if check_media_file_for_spam is not None: self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam) + @trace async def check_event_for_spam( self, event: "synapse.events.EventBase" ) -> Union[Tuple[Codes, JsonDict], str]: diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 2522bf78fccb..dcd0a4b425df 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -23,6 +23,7 @@ from synapse.events import EventBase, make_event_from_dict from synapse.events.utils import prune_event, validate_canonicaljson from synapse.http.servlet import assert_params_in_dict +from synapse.logging.tracing import log_kv, trace from synapse.types import JsonDict, get_domain_from_id if TYPE_CHECKING: @@ -55,6 +56,7 @@ def __init__(self, hs: "HomeServer"): self._clock = hs.get_clock() self._storage_controllers = hs.get_storage_controllers() + @trace async def _check_sigs_and_hash( self, room_version: RoomVersion, pdu: EventBase ) -> EventBase: @@ -97,17 +99,36 @@ async def _check_sigs_and_hash( "Event %s seems to have been redacted; using our redacted copy", pdu.event_id, ) + log_kv( + { + "message": "Event seems to have been redacted; using our redacted copy", + "event_id": pdu.event_id, + } + ) else: logger.warning( "Event %s content has been tampered, redacting", pdu.event_id, ) + log_kv( + { + "message": "Event content has been tampered, redacting", + "event_id": pdu.event_id, + } + ) return redacted_event spam_check = await self.spam_checker.check_event_for_spam(pdu) if spam_check != self.spam_checker.NOT_SPAM: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) + log_kv( + { + "message": "Event contains spam, redacting (to save disk space) " + "as well as soft-failing (to stop using the event in prev_events)", + "event_id": pdu.event_id, + } + ) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). redacted_event = prune_event(pdu) @@ -117,6 +138,7 @@ async def _check_sigs_and_hash( return pdu +@trace async def _check_sigs_on_pdu( keyring: Keyring, room_version: RoomVersion, pdu: EventBase ) -> None: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 987f6dad460c..5e6ac241462d 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,7 +61,7 @@ ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams -from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace +from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -587,6 +587,10 @@ async def _check_sigs_and_hash_and_fetch( Returns: A list of PDUs that have valid signatures and hashes. """ + set_tag( + SynapseTags.RESULT_PREFIX + "pdus.length", + str(len(pdus)), + ) # We limit how many PDUs we check at once, as if we try to do hundreds # of thousands of PDUs at once we see large memory spikes. @@ -607,6 +611,8 @@ async def _execute(pdu: EventBase) -> None: return valid_pdus + @trace + @tag_args async def _check_sigs_and_hash_and_fetch_one( self, pdu: EventBase, @@ -643,6 +649,14 @@ async def _check_sigs_and_hash_and_fetch_one( pdu.event_id, e, ) + log_kv( + { + "message": "Signature on retrieved event was invalid. " + "Checking local store/orgin server", + "event_id": pdu.event_id, + "InvalidEventSignatureError": e, + } + ) # Check local db. res = await self.store.get_event( diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index f678b52cb4f5..9ba77e20d261 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -168,6 +168,11 @@ def should_reject(self) -> bool: (either sleeping or in the ready queue). """ queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) + logger.info( + "should_reject queue_size=%s self.reject_limit=%s", + queue_size, + self.reject_limit, + ) return queue_size > self.reject_limit def should_sleep(self) -> bool: From 9753ac33bc9148e79f6e2acf357f1a82a985c310 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 18:04:49 -0500 Subject: [PATCH 2/5] Add changelog --- changelog.d/13588.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13588.misc diff --git a/changelog.d/13588.misc b/changelog.d/13588.misc new file mode 100644 index 000000000000..eca1416ceb1f --- /dev/null +++ b/changelog.d/13588.misc @@ -0,0 +1 @@ +Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls for understandable traces in Jaeger. From 2ee439b95e0e621200c286c845897f5901af1984 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 18:11:26 -0500 Subject: [PATCH 3/5] Remove unrelated debug log See https://github.com/matrix-org/synapse/pull/13588#discussion_r951990951 --- synapse/util/ratelimitutils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 9ba77e20d261..f678b52cb4f5 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -168,11 +168,6 @@ def should_reject(self) -> bool: (either sleeping or in the ready queue). """ queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) - logger.info( - "should_reject queue_size=%s self.reject_limit=%s", - queue_size, - self.reject_limit, - ) return queue_size > self.reject_limit def should_sleep(self) -> bool: From af6bdff7d7f167850d83b9093b750c318152db34 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 18:24:01 -0500 Subject: [PATCH 4/5] Fix lints and cleanup - https://github.com/matrix-org/synapse/pull/13588#discussion_r951990446 - https://github.com/matrix-org/synapse/pull/13588#discussion_r951990951 --- synapse/crypto/event_signing.py | 2 +- synapse/events/spamcheck.py | 2 +- synapse/federation/federation_base.py | 2 +- synapse/federation/federation_client.py | 5 ++++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 7c5a21ff4c8b..23b799ac3250 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -28,7 +28,7 @@ from synapse.api.room_versions import RoomVersion from synapse.events import EventBase from synapse.events.utils import prune_event, prune_event_dict -from synapse.logging.tracing import trace +from synapse.logging.opentracing import trace from synapse.types import JsonDict logger = logging.getLogger(__name__) diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index c691ef5973e4..623a2c71eadb 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -32,7 +32,7 @@ import synapse from synapse.api.errors import Codes -from synapse.logging.tracing import trace +from synapse.logging.opentracing import trace from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index dcd0a4b425df..4269a98db2db 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -23,7 +23,7 @@ from synapse.events import EventBase, make_event_from_dict from synapse.events.utils import prune_event, validate_canonicaljson from synapse.http.servlet import assert_params_in_dict -from synapse.logging.tracing import log_kv, trace +from synapse.logging.opentracing import log_kv, trace from synapse.types import JsonDict, get_domain_from_id if TYPE_CHECKING: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 5e6ac241462d..8aa7c52621a4 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -595,7 +595,7 @@ async def _check_sigs_and_hash_and_fetch( # We limit how many PDUs we check at once, as if we try to do hundreds # of thousands of PDUs at once we see large memory spikes. - valid_pdus = [] + valid_pdus: List[EventBase] = [] async def _execute(pdu: EventBase) -> None: valid_pdu = await self._check_sigs_and_hash_and_fetch_one( @@ -663,6 +663,9 @@ async def _check_sigs_and_hash_and_fetch_one( pdu.event_id, allow_rejected=True, allow_none=True ) + # If the PDU fails its signature check and we don't have it in our + # database, we then request it from sender's server (if that is not the + # same as `origin`). pdu_origin = get_domain_from_id(pdu.sender) if not res and pdu_origin != origin: try: From 7224ad8a65c60d3bcb51ab5ad686d8f3b00db060 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 20:12:30 -0500 Subject: [PATCH 5/5] Fix typo --- synapse/federation/federation_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 8aa7c52621a4..7ee2974bb155 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -645,14 +645,14 @@ async def _check_sigs_and_hash_and_fetch_one( except InvalidEventSignatureError as e: logger.warning( "Signature on retrieved event %s was invalid (%s). " - "Checking local store/orgin server", + "Checking local store/origin server", pdu.event_id, e, ) log_kv( { "message": "Signature on retrieved event was invalid. " - "Checking local store/orgin server", + "Checking local store/origin server", "event_id": pdu.event_id, "InvalidEventSignatureError": e, }