From d2e2aa7bf4f609ac552b4676387547c07e24b30b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 24 Jun 2021 01:32:24 -0500 Subject: [PATCH 01/53] Make historical messages available to federated servers Part of MSC2716: https://github.com/matrix-org/matrix-doc/pull/2716 Follow-up to https://github.com/matrix-org/synapse/pull/9247 --- synapse/handlers/federation.py | 1 + .../databases/main/event_federation.py | 2 + synapse/storage/databases/main/events.py | 35 ++++++++++++++++ .../delta/59/14insertion_event_lookups.sql | 40 +++++++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1b566dbf2dad..5bf3e4018dab 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1052,6 +1052,7 @@ async def maybe_backfill( with (await self._room_backfill.queue(room_id)): return await self._maybe_backfill_inner(room_id, current_depth, limit) + # Todo async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c0ea44555024..9483800fb22b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -663,6 +663,8 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" " ON g.prev_event_id = b.event_id" + " INNER JOIN insertion_event_extremeties as i" + " ON g.event_id = i.insertion_prev_event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 897fa06639cb..4aca0e8bca51 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1504,6 +1504,8 @@ def _update_metadata_tables_txn( self._handle_event_relations(txn, event) + self._handle_marker_event(txn, event) + # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) if labels: @@ -1756,6 +1758,39 @@ def _handle_event_relations(self, txn, event): if rel_type == RelationTypes.REPLACE: txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) + def _handle_marker_event(self, txn, event): + """Handles inserting insertion extremeties during peristence of marker events + + Args: + txn + event (EventBase) + """ + + if event.type != EventTypes.MSC2716_MARKER: + # Not a marker event + return + + insertion_event_id = event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION + ) + insertion_prev_event_ids = event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION_PREV_EVENTS + ) + if not insertion_event_id or not insertion_prev_event_ids: + # Invalid marker event + return + + for prev_event_id in insertion_prev_event_ids: + self.db_pool.simple_insert_txn( + txn, + table="insertion_event_extremeties", + values={ + "insertion_event_id": insertion_event_id, + "room_id": event.room_id, + "insertion_prev_event_id": prev_event_id, + }, + ) + def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove any redacted relations from the database. diff --git a/synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql b/synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql new file mode 100644 index 000000000000..58b71f1bc411 --- /dev/null +++ b/synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql @@ -0,0 +1,40 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a table that keeps track of "insertion" events back in the history +-- when we get a "marker" event over the "live" timeline. When navigating the DAG +-- and we hit an event which matches `insertion_prev_event_id`, it should backfill +-- the "insertion" event and start navigating from there. + + +CREATE TABLE IF NOT EXISTS insertion_event_extremeties( + insertion_event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + insertion_prev_event_id TEXT NOT NULL, + UNIQUE (insertion_event_id, room_id, room_id, insertion_prev_event_id) +); + +CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_room_id ON insertion_event_extremeties(room_id); +CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_event_id ON insertion_event_extremeties(insertion_event_id); +CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_prev_event_id ON insertion_event_extremeties(insertion_prev_event_id); + +CREATE TABLE IF NOT EXISTS chunk_connections( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + chunk_id TEXT NOT NULL, + UNIQUE (event_id, room_id) +); + +CREATE INDEX IF NOT EXISTS chunk_connections_insertion_chunk_id ON chunk_connections(chunk_id); From 2d942ec0c1bab07d22852a65e9b95e9ee4bd1227 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 24 Jun 2021 19:32:18 -0500 Subject: [PATCH 02/53] Debug message not available on federation --- scripts-dev/complement.sh | 2 +- synapse/handlers/federation.py | 9 +++++++++ synapse/storage/databases/main/event_federation.py | 5 +++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index ba060104c3aa..1e3bf357f797 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5bf3e4018dab..e0ab9eca48d0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1057,6 +1057,7 @@ async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: extremities = await self.store.get_oldest_events_with_depth_in_room(room_id) + logger.info("_maybe_backfill_inner extremities=%s", extremities) if not extremities: logger.debug("Not backfilling as no extremeties found.") @@ -2161,8 +2162,16 @@ async def on_backfill_request( limit = min(limit, 100) events = await self.store.get_backfill_events(room_id, pdu_list, limit) + logger.info( + "on_backfill_request get_backfill_events events(%d)=%s", len(events), events + ) events = await filter_events_for_server(self.storage, origin, events) + logger.info( + "on_backfill_request filter_events_for_server events(%d)=%s", + len(events), + events, + ) return events diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 9483800fb22b..522f1f364e95 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -663,8 +663,9 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" " ON g.prev_event_id = b.event_id" - " INNER JOIN insertion_event_extremeties as i" - " ON g.event_id = i.insertion_prev_event_id" + # TODO + # " INNER JOIN insertion_event_extremeties as i" + # " ON g.event_id = i.insertion_prev_event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) From 38bcf13e1e49b2a79aedf0e1025b0b42c249e0b7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 25 Jun 2021 01:04:38 -0500 Subject: [PATCH 03/53] Add base starting insertion point when no chunk ID is provided --- scripts-dev/complement.sh | 2 +- synapse/events/utils.py | 14 +++---- synapse/rest/client/v1/room.py | 76 +++++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 28 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 1e3bf357f797..0b00e2f8ce9a 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order diff --git a/synapse/events/utils.py b/synapse/events/utils.py index ec96999e4e77..9c115758e9b4 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 92ebe838fd84..25af9cd4292d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -349,6 +349,30 @@ async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int: return depth + def _create_insertion_event_dict(self, sender: str, origin_server_ts: int): + """ + Creates an event dict for an "insertion" event with the proper fields + and a random chunk ID. + Args: + sender: The event author MXID + origin_server_ts: Timestamp when the event was sent + Returns: + Tuple of event ID and stream ordering position + """ + + next_chunk_id = random_string(64) + insertion_event = { + "type": EventTypes.MSC2716_INSERTION, + "sender": sender, + "content": { + EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, + EventContentFields.MSC2716_HISTORICAL: True, + }, + "origin_server_ts": origin_server_ts, + } + + return insertion_event + async def on_POST(self, request, room_id): requester = await self.auth.get_user_by_req(request, allow_guest=False) @@ -449,30 +473,40 @@ async def on_POST(self, request, room_id): events_to_create = body["events"] - # If provided, connect the chunk to the last insertion point - # The chunk ID passed in comes from the chunk_id in the - # "insertion" event from the previous chunk. + # Figure out which chunk to connect to. If they passed in + # chunk_id_from_query let's use it. The chunk ID passed in comes + # from the chunk_id in the "insertion" event from the previous chunk. + last_event_in_chunk = events_to_create[-1] + chunk_id_to_connect_to = chunk_id_from_query if chunk_id_from_query: - last_event_in_chunk = events_to_create[-1] - last_event_in_chunk["content"][ - EventContentFields.MSC2716_CHUNK_ID - ] = chunk_id_from_query + # TODO: Verify the chunk_id_from_query corresponds to an insertion event + pass + # Otherwise, create an insertion event to be based off of and connect + # to as a starting point. + else: + base_insertion_event = self._create_insertion_event_dict( + sender=requester.user.to_string(), + origin_server_ts=last_event_in_chunk["origin_server_ts"], + ) + events_to_create.append(base_insertion_event) + chunk_id_to_connect_to = base_insertion_event["content"][ + EventContentFields.MSC2716_NEXT_CHUNK_ID + ] + + # Connect this current chunk to the insertion event from the previous chunk + last_event_in_chunk["content"][ + EventContentFields.MSC2716_CHUNK_ID + ] = chunk_id_to_connect_to - # Add an "insertion" event to the start of each chunk (next to the oldest + # Add an "insertion" event to the start of each chunk (next to the oldest-in-time # event in the chunk) so the next chunk can be connected to this one. - next_chunk_id = random_string(64) - insertion_event = { - "type": EventTypes.MSC2716_INSERTION, - "sender": requester.user.to_string(), - "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, - EventContentFields.MSC2716_HISTORICAL: True, - }, + insertion_event = self._create_insertion_event_dict( + sender=requester.user.to_string(), # Since the insertion event is put at the start of the chunk, - # where the oldest event is, copy the origin_server_ts from + # where the oldest-in-time event is, copy the origin_server_ts from # the first event we're inserting - "origin_server_ts": events_to_create[0]["origin_server_ts"], - } + origin_server_ts=events_to_create[0]["origin_server_ts"], + ) # Prepend the insertion event to the start of the chunk events_to_create = [insertion_event] + events_to_create @@ -536,7 +570,9 @@ async def on_POST(self, request, room_id): return 200, { "state_events": auth_event_ids, "events": event_ids, - "next_chunk_id": next_chunk_id, + "next_chunk_id": insertion_event["content"][ + EventContentFields.MSC2716_NEXT_CHUNK_ID + ], } def on_GET(self, request, room_id): From e405a23f48f9b2ac74ed8d9bcb8df496f18be956 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 29 Jun 2021 17:03:54 -0500 Subject: [PATCH 04/53] Fix messages from multiple senders in historical chunk Follow-up to https://github.com/matrix-org/synapse/pull/9247 Part of MSC2716: https://github.com/matrix-org/matrix-doc/pull/2716 --- Previously, Synapse would throw a 403, `Cannot force another user to join.`, because we were trying to use `?user_id` from a single virtual user which did not match with messages from other users in the chunk. --- synapse/event_auth.py | 10 ++++++++++ synapse/rest/client/v1/room.py | 27 +++++++++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 33d7c6024147..4c8268647ab9 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -342,7 +342,17 @@ def _is_membership_change_allowed( # * They are accepting a previously sent invitation. # * They are already joined (it's a NOOP). # * The room is public or restricted. + logger.info( + "check join aewffaewafewf %s %s", + event.user_id, + target_user_id, + ) if event.user_id != target_user_id: + logger.error( + "Cannot force another user to join aewffaewafewf %s %s", + event.user_id, + target_user_id, + ) raise AuthError(403, "Cannot force another user to join.") elif target_banned: raise AuthError(403, "You are banned from this room") diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 25af9cd4292d..3470813fa041 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -14,6 +14,7 @@ # limitations under the License. """ This module contains REST servlets to do with rooms: /rooms/ """ +import copy import logging import re from typing import TYPE_CHECKING, Dict, List, Optional, Tuple @@ -47,6 +48,7 @@ from synapse.streams.config import PaginationConfig from synapse.types import ( JsonDict, + Requester, RoomAlias, RoomID, StreamToken, @@ -309,7 +311,14 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int: + def _copy_requester_and_override_user_id(self, requester, new_user_id): + serialized_requester = requester.serialize() + serialized_requester["user_id"] = new_user_id + new_requester = Requester.deserialize(self.store, serialized_requester) + + return new_requester + + async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int: ( most_recent_prev_event_id, most_recent_prev_event_depth, @@ -438,7 +447,9 @@ async def on_POST(self, request, room_id): if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( - requester, + self._copy_requester_and_override_user_id( + requester, state_event["sender"] + ), target=UserID.from_string(event_dict["state_key"]), room_id=room_id, action=membership, @@ -458,7 +469,9 @@ async def on_POST(self, request, room_id): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, + self._copy_requester_and_override_user_id( + requester, state_event["sender"] + ), event_dict, outlier=True, prev_event_ids=[fake_prev_event_id], @@ -510,7 +523,9 @@ async def on_POST(self, request, room_id): # Prepend the insertion event to the start of the chunk events_to_create = [insertion_event] + events_to_create - inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query) + inherited_depth = await self._inherit_depth_from_prev_ids( + prev_events_from_query + ) event_ids = [] prev_event_ids = prev_events_from_query @@ -532,7 +547,7 @@ async def on_POST(self, request, room_id): } event, context = await self.event_creation_handler.create_event( - requester, + self._copy_requester_and_override_user_id(requester, ev["sender"]), event_dict, prev_event_ids=event_dict.get("prev_events"), auth_event_ids=auth_event_ids, @@ -562,7 +577,7 @@ async def on_POST(self, request, room_id): # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): ev = await self.event_creation_handler.handle_new_client_event( - requester=requester, + self._copy_requester_and_override_user_id(requester, event["sender"]), event=event, context=context, ) From 36f156588447ec1a8d87b46d276a7e1e85d6617d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 29 Jun 2021 17:14:40 -0500 Subject: [PATCH 05/53] Remove debug lines --- synapse/event_auth.py | 10 ---------- synapse/rest/client/v1/room.py | 1 - 2 files changed, 11 deletions(-) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 4c8268647ab9..33d7c6024147 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -342,17 +342,7 @@ def _is_membership_change_allowed( # * They are accepting a previously sent invitation. # * They are already joined (it's a NOOP). # * The room is public or restricted. - logger.info( - "check join aewffaewafewf %s %s", - event.user_id, - target_user_id, - ) if event.user_id != target_user_id: - logger.error( - "Cannot force another user to join aewffaewafewf %s %s", - event.user_id, - target_user_id, - ) raise AuthError(403, "Cannot force another user to join.") elif target_banned: raise AuthError(403, "You are banned from this room") diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3470813fa041..5c8961509ed3 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -14,7 +14,6 @@ # limitations under the License. """ This module contains REST servlets to do with rooms: /rooms/ """ -import copy import logging import re from typing import TYPE_CHECKING, Dict, List, Optional, Tuple From 05d6c513f60ed738b63113299c4d6ba180b658fb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Jul 2021 23:37:54 -0500 Subject: [PATCH 06/53] Messing with selecting insertion event extremeties --- scripts-dev/complement.sh | 2 +- synapse/handlers/federation.py | 4 +- .../databases/main/event_federation.py | 43 ++++++++++++++----- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 0b00e2f8ce9a..1e3bf357f797 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e0ab9eca48d0..ecf401f9387f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1057,7 +1057,9 @@ async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: extremities = await self.store.get_oldest_events_with_depth_in_room(room_id) - logger.info("_maybe_backfill_inner extremities=%s", extremities) + logger.info( + "_maybe_backfill_inner extremities(%d)=%s", len(extremities), extremities + ) if not extremities: logger.debug("Not backfilling as no extremeties found.") diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 522f1f364e95..eda940c31b67 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -657,20 +657,42 @@ async def get_oldest_events_with_depth_in_room(self, room_id): ) def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): + # sql = ( + # "SELECT b.event_id, MAX(e.depth) FROM events as e" + # " INNER JOIN event_edges as g" + # " ON g.event_id = e.event_id" + # " INNER JOIN event_backward_extremities as b" + # " ON g.prev_event_id = b.event_id" + # # TODO + # # " INNER JOIN insertion_event_extremeties as i" + # # " ON g.event_id = i.insertion_prev_event_id" + # " WHERE b.room_id = ? AND g.is_state is ?" + # " GROUP BY b.event_id" + # ) + # txn.execute(sql, (room_id, False)) + + sqlAsdf = "SELECT * FROM insertion_event_extremeties as i" + txn.execute(sqlAsdf) + logger.info("wfeafewawafeawg %s", dict(txn)) + + sqlAsdf = "SELECT * FROM insertion_event_extremeties as i WHERE i.room_id = ?" + txn.execute(sqlAsdf, (room_id,)) + logger.info("awfeawefw %s", dict(txn)) + sql = ( - "SELECT b.event_id, MAX(e.depth) FROM events as e" - " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id" - " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id" + "SELECT i.insertion_event_id, MAX(e.depth) FROM events as e" + # " INNER JOIN event_edges as g" + # " ON g.event_id = e.event_id" + # " INNER JOIN event_backward_extremities as b" + # " ON g.prev_event_id = b.event_id" # TODO - # " INNER JOIN insertion_event_extremeties as i" - # " ON g.event_id = i.insertion_prev_event_id" - " WHERE b.room_id = ? AND g.is_state is ?" - " GROUP BY b.event_id" + " INNER JOIN insertion_event_extremeties as i" + " ON e.event_id = i.insertion_prev_event_id" + " WHERE i.room_id = ?" + " GROUP BY i.insertion_event_id" ) - txn.execute(sql, (room_id, False)) + txn.execute(sql, (room_id,)) return dict(txn) @@ -923,6 +945,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # We want to make sure that we do a breadth-first, "depth" ordered # search. + # TODO query = ( "SELECT depth, prev_event_id FROM event_edges" " INNER JOIN events" From dfad8a880dca01a915141450ab19aab54519c96c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jul 2021 15:44:54 -0500 Subject: [PATCH 07/53] Move db schema change to new version --- synapse/storage/schema/__init__.py | 2 +- .../01insertion_event_lookups.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename synapse/storage/schema/main/delta/{59/14insertion_event_lookups.sql => 61/01insertion_event_lookups.sql} (100%) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 0a53b73ccc4e..dd431d64266a 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 60 +SCHEMA_VERSION = 61 """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the diff --git a/synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql similarity index 100% rename from synapse/storage/schema/main/delta/59/14insertion_event_lookups.sql rename to synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql From 7d850dbecac5f7ccce88c51fd0394f395b416cf1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jul 2021 16:22:30 -0500 Subject: [PATCH 08/53] Add more better comments --- synapse/rest/client/v1/room.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 5c8961509ed3..2c427a67ff9d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -358,17 +358,18 @@ async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int: return depth def _create_insertion_event_dict(self, sender: str, origin_server_ts: int): - """ - Creates an event dict for an "insertion" event with the proper fields + """Creates an event dict for an "insertion" event with the proper fields and a random chunk ID. + Args: sender: The event author MXID origin_server_ts: Timestamp when the event was sent + Returns: Tuple of event ID and stream ordering position """ - next_chunk_id = random_string(64) + next_chunk_id = random_string(8) insertion_event = { "type": EventTypes.MSC2716_INSERTION, "sender": sender, @@ -493,8 +494,13 @@ async def on_POST(self, request, room_id): if chunk_id_from_query: # TODO: Verify the chunk_id_from_query corresponds to an insertion event pass - # Otherwise, create an insertion event to be based off of and connect - # to as a starting point. + # Otherwise, create an insertion event to act as a starting point. + # + # We don't always have an insertion event to start hanging more history + # off of (ideally there would be one in the main DAG, but that's not the + # case if we're wanting to add history to e.g. existing rooms without + # an insertion event), in which case we just create a new insertion event + # that can then get pointed to by a "marker" event later. else: base_insertion_event = self._create_insertion_event_dict( sender=requester.user.to_string(), From 164dee45b8010c01382e5c768b3bcdff8267f466 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jul 2021 16:47:16 -0500 Subject: [PATCH 09/53] Make a fake requester with just what we need See https://github.com/matrix-org/synapse/pull/10276#discussion_r660999080 --- synapse/rest/client/v1/room.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2c427a67ff9d..5b7b7fd0919b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -47,12 +47,12 @@ from synapse.streams.config import PaginationConfig from synapse.types import ( JsonDict, - Requester, RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID, + create_requester, ) from synapse.util import json_decoder from synapse.util.stringutils import parse_and_validate_server_name, random_string @@ -310,13 +310,6 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - def _copy_requester_and_override_user_id(self, requester, new_user_id): - serialized_requester = requester.serialize() - serialized_requester["user_id"] = new_user_id - new_requester = Requester.deserialize(self.store, serialized_requester) - - return new_requester - async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int: ( most_recent_prev_event_id, @@ -447,8 +440,8 @@ async def on_POST(self, request, room_id): if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( - self._copy_requester_and_override_user_id( - requester, state_event["sender"] + create_requester( + state_event["sender"], app_service=requester.app_service ), target=UserID.from_string(event_dict["state_key"]), room_id=room_id, @@ -469,8 +462,8 @@ async def on_POST(self, request, room_id): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - self._copy_requester_and_override_user_id( - requester, state_event["sender"] + create_requester( + state_event["sender"], app_service=requester.app_service ), event_dict, outlier=True, @@ -552,7 +545,7 @@ async def on_POST(self, request, room_id): } event, context = await self.event_creation_handler.create_event( - self._copy_requester_and_override_user_id(requester, ev["sender"]), + create_requester(ev["sender"], app_service=requester.app_service), event_dict, prev_event_ids=event_dict.get("prev_events"), auth_event_ids=auth_event_ids, @@ -582,7 +575,7 @@ async def on_POST(self, request, room_id): # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): ev = await self.event_creation_handler.handle_new_client_event( - self._copy_requester_and_override_user_id(requester, event["sender"]), + create_requester(event["sender"], app_service=requester.app_service), event=event, context=context, ) From 04b1f7ec022ea1470b66882731a0e2af89cf0148 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jul 2021 20:56:21 -0500 Subject: [PATCH 10/53] Store insertion events in table --- synapse/storage/databases/main/events.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 4aca0e8bca51..760a0f35b53b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1504,6 +1504,7 @@ def _update_metadata_tables_txn( self._handle_event_relations(txn, event) + self._handle_insertion_event(txn, event) self._handle_marker_event(txn, event) # Store the labels for this event. @@ -1758,6 +1759,31 @@ def _handle_event_relations(self, txn, event): if rel_type == RelationTypes.REPLACE: txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) + def _handle_insertion_event(self, txn, event): + """Handles inserting insertion extremeties during peristence of marker events + + Args: + txn + event (EventBase) + """ + + if event.type != EventTypes.MSC2716_INSERTION: + # Not a insertion event + return + + logger.info("_handle_insertion_event %s", event) + + for prev_event_id in event.prev_event_ids: + self.db_pool.simple_insert_txn( + txn, + table="insertion_event_extremeties", + values={ + "insertion_event_id": event.event_id, + "room_id": event.room_id, + "insertion_prev_event_id": prev_event_id, + }, + ) + def _handle_marker_event(self, txn, event): """Handles inserting insertion extremeties during peristence of marker events @@ -1770,6 +1796,8 @@ def _handle_marker_event(self, txn, event): # Not a marker event return + logger.info("_handle_marker_event %s", event) + insertion_event_id = event.content.get( EventContentFields.MSC2716_MARKER_INSERTION ) From b703962095f8b1ed060f9bb61faa19afe993fb75 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jul 2021 23:08:39 -0500 Subject: [PATCH 11/53] Make base insertion event float off on its own See https://github.com/matrix-org/synapse/pull/10250#issuecomment-875711889 Conflicts: synapse/rest/client/v1/room.py --- synapse/handlers/message.py | 8 +++++++ synapse/rest/client/v1/room.py | 40 +++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 66e40a915d04..61e6f4ef252c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -518,6 +518,9 @@ async def create_event( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. @@ -772,6 +775,7 @@ async def create_and_send_nonmember_event( txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, outlier: bool = False, + historical: bool = False, depth: Optional[int] = None, ) -> Tuple[EventBase, int]: """ @@ -799,6 +803,9 @@ async def create_and_send_nonmember_event( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted + back in time around some existing events. This is used to skip + a few checks and mark the event as backfilled. depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. @@ -847,6 +854,7 @@ async def create_and_send_nonmember_event( prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, outlier=outlier, + historical=historical, depth=depth, ) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 5b7b7fd0919b..6e69cfffef8e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -350,12 +350,15 @@ async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int: return depth - def _create_insertion_event_dict(self, sender: str, origin_server_ts: int): + def _create_insertion_event_dict( + self, sender: str, room_id: str, origin_server_ts: int + ): """Creates an event dict for an "insertion" event with the proper fields and a random chunk ID. Args: sender: The event author MXID + room_id: The room ID that the event belongs to origin_server_ts: Timestamp when the event was sent Returns: @@ -366,6 +369,7 @@ def _create_insertion_event_dict(self, sender: str, origin_server_ts: int): insertion_event = { "type": EventTypes.MSC2716_INSERTION, "sender": sender, + "room_id": room_id, "content": { EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, EventContentFields.MSC2716_HISTORICAL: True, @@ -479,11 +483,17 @@ async def on_POST(self, request, room_id): events_to_create = body["events"] + prev_event_ids = prev_events_from_query + inherited_depth = await self._inherit_depth_from_prev_ids( + prev_events_from_query + ) + # Figure out which chunk to connect to. If they passed in # chunk_id_from_query let's use it. The chunk ID passed in comes # from the chunk_id in the "insertion" event from the previous chunk. last_event_in_chunk = events_to_create[-1] chunk_id_to_connect_to = chunk_id_from_query + base_insertion_event = None if chunk_id_from_query: # TODO: Verify the chunk_id_from_query corresponds to an insertion event pass @@ -495,11 +505,25 @@ async def on_POST(self, request, room_id): # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - base_insertion_event = self._create_insertion_event_dict( + base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), + room_id=room_id, origin_server_ts=last_event_in_chunk["origin_server_ts"], ) - events_to_create.append(base_insertion_event) + base_insertion_event_dict["prev_events"] = prev_event_ids.copy() + + ( + base_insertion_event, + _, + ) = await self.event_creation_handler.create_and_send_nonmember_event( + requester, + base_insertion_event_dict, + prev_event_ids=base_insertion_event_dict.get("prev_events"), + auth_event_ids=auth_event_ids, + historical=True, + depth=inherited_depth, + ) + chunk_id_to_connect_to = base_insertion_event["content"][ EventContentFields.MSC2716_NEXT_CHUNK_ID ] @@ -513,6 +537,7 @@ async def on_POST(self, request, room_id): # event in the chunk) so the next chunk can be connected to this one. insertion_event = self._create_insertion_event_dict( sender=requester.user.to_string(), + room_id=room_id, # Since the insertion event is put at the start of the chunk, # where the oldest-in-time event is, copy the origin_server_ts from # the first event we're inserting @@ -521,12 +546,7 @@ async def on_POST(self, request, room_id): # Prepend the insertion event to the start of the chunk events_to_create = [insertion_event] + events_to_create - inherited_depth = await self._inherit_depth_from_prev_ids( - prev_events_from_query - ) - event_ids = [] - prev_event_ids = prev_events_from_query events_to_persist = [] for ev in events_to_create: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) @@ -580,6 +600,10 @@ async def on_POST(self, request, room_id): context=context, ) + # Add the base_insertion_event to the bottom of the list we return + if base_insertion_event is not None: + event_ids.append(base_insertion_event.event_id) + return 200, { "state_events": auth_event_ids, "events": event_ids, From 8c205e55a99baafcadecda5d0d114c977747e476 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jul 2021 20:22:57 -0500 Subject: [PATCH 12/53] Validate that the app service can actually control the given user See https://github.com/matrix-org/synapse/pull/10276#issuecomment-876316455 Conflicts: synapse/rest/client/v1/room.py --- synapse/rest/client/v1/room.py | 54 +++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 6e69cfffef8e..a0d6904dd4fa 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -29,6 +29,9 @@ SynapseError, ) from synapse.api.filtering import Filter + + +from synapse.appservice import ApplicationService from synapse.events.utils import format_event_for_client_v2 from synapse.http.servlet import ( RestServlet, @@ -47,6 +50,7 @@ from synapse.streams.config import PaginationConfig from synapse.types import ( JsonDict, + Requester, RoomAlias, RoomID, StreamToken, @@ -379,6 +383,35 @@ def _create_insertion_event_dict( return insertion_event + async def _create_requester_from_app_service( + self, user_id: str, app_service: ApplicationService + ) -> Requester: + """Creates a new requester for the given user_id + and validates that the app service is allowed to control + the given user. + + Args: + user_id: The author MXID that the app service is controlling + app_service: The app service that controls the user + + Returns: + Requester object + """ + + if app_service.sender == user_id: + pass + elif not app_service.is_interested_in_user(user_id): + raise AuthError( + 403, + "Application service cannot masquerade as this user (%s)." % user_id, + ) + elif not (await self.store.get_user_by_id(user_id)): + raise AuthError( + 403, "Application service has not registered this user (%s)" % user_id + ) + + return create_requester(user_id, app_service=app_service) + async def on_POST(self, request, room_id): requester = await self.auth.get_user_by_req(request, allow_guest=False) @@ -444,8 +477,8 @@ async def on_POST(self, request, room_id): if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( - create_requester( - state_event["sender"], app_service=requester.app_service + await self._create_requester_from_app_service( + state_event["sender"], requester.app_service ), target=UserID.from_string(event_dict["state_key"]), room_id=room_id, @@ -466,8 +499,8 @@ async def on_POST(self, request, room_id): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - create_requester( - state_event["sender"], app_service=requester.app_service + await self._create_requester_from_app_service( + state_event["sender"], requester.app_service ), event_dict, outlier=True, @@ -516,7 +549,10 @@ async def on_POST(self, request, room_id): base_insertion_event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, + await self._create_requester_from_app_service( + base_insertion_event_dict["sender"], + requester.app_service, + ), base_insertion_event_dict, prev_event_ids=base_insertion_event_dict.get("prev_events"), auth_event_ids=auth_event_ids, @@ -565,7 +601,9 @@ async def on_POST(self, request, room_id): } event, context = await self.event_creation_handler.create_event( - create_requester(ev["sender"], app_service=requester.app_service), + await self._create_requester_from_app_service( + ev["sender"], requester.app_service + ), event_dict, prev_event_ids=event_dict.get("prev_events"), auth_event_ids=auth_event_ids, @@ -595,7 +633,9 @@ async def on_POST(self, request, room_id): # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): ev = await self.event_creation_handler.handle_new_client_event( - create_requester(event["sender"], app_service=requester.app_service), + await self._create_requester_from_app_service( + event["sender"], requester.app_service + ), event=event, context=context, ) From 7b8b2d1b7bb5466791ca45dfdaf010fff081bffa Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jul 2021 20:36:02 -0500 Subject: [PATCH 13/53] Add some better comments on what we're trying to check for --- synapse/rest/client/v1/room.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a0d6904dd4fa..7ff4ad6bf5c7 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -29,8 +29,6 @@ SynapseError, ) from synapse.api.filtering import Filter - - from synapse.appservice import ApplicationService from synapse.events.utils import format_event_for_client_v2 from synapse.http.servlet import ( @@ -398,13 +396,16 @@ async def _create_requester_from_app_service( Requester object """ + # It's ok if the app service is trying to use the sender from their registration if app_service.sender == user_id: pass + # Check to make sure the app service is allowed to control the user elif not app_service.is_interested_in_user(user_id): raise AuthError( 403, "Application service cannot masquerade as this user (%s)." % user_id, ) + # Check to make sure the user is already registered on the homeserver elif not (await self.store.get_user_by_id(user_id)): raise AuthError( 403, "Application service has not registered this user (%s)" % user_id From 4226165cc4851757f0d9cf1b49cea5c2f51e65a2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Jul 2021 14:34:38 -0500 Subject: [PATCH 14/53] Continue debugging --- synapse/handlers/federation.py | 4 ++- .../databases/main/event_federation.py | 28 +++++++++---------- synapse/storage/databases/main/events.py | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f7b190a08d22..585fb6f5284f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2128,7 +2128,9 @@ async def on_backfill_request( events = await self.store.get_backfill_events(room_id, pdu_list, limit) logger.info( - "on_backfill_request get_backfill_events events(%d)=%s", len(events), events + "on_backfill_request get_backfill_events events(%d)=%s", + len(events), + [f'{ev.content.get("body")}: {ev.type} ({ev.event_id})' for ev in events], ) events = await filter_events_for_server(self.storage, origin, events) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 221ebbe0b482..8b88a01e1710 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -673,20 +673,6 @@ async def get_oldest_events_with_depth_in_room(self, room_id): ) def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): - # sql = ( - # "SELECT b.event_id, MAX(e.depth) FROM events as e" - # " INNER JOIN event_edges as g" - # " ON g.event_id = e.event_id" - # " INNER JOIN event_backward_extremities as b" - # " ON g.prev_event_id = b.event_id" - # # TODO - # # " INNER JOIN insertion_event_extremeties as i" - # # " ON g.event_id = i.insertion_prev_event_id" - # " WHERE b.room_id = ? AND g.is_state is ?" - # " GROUP BY b.event_id" - # ) - # txn.execute(sql, (room_id, False)) - sqlAsdf = "SELECT * FROM insertion_event_extremeties as i" txn.execute(sqlAsdf) logger.info("wfeafewawafeawg %s", dict(txn)) @@ -710,6 +696,20 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): txn.execute(sql, (room_id,)) + sql = ( + "SELECT b.event_id, MAX(e.depth) FROM events as e" + " INNER JOIN event_edges as g" + " ON g.event_id = e.event_id" + " INNER JOIN event_backward_extremities as b" + " ON g.prev_event_id = b.event_id" + # TODO + # " INNER JOIN insertion_event_extremeties as i" + # " ON g.event_id = i.insertion_prev_event_id" + " WHERE b.room_id = ? AND g.is_state is ?" + " GROUP BY b.event_id" + ) + txn.execute(sql, (room_id, False)) + return dict(txn) async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 760a0f35b53b..228dce91b0fd 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1773,7 +1773,7 @@ def _handle_insertion_event(self, txn, event): logger.info("_handle_insertion_event %s", event) - for prev_event_id in event.prev_event_ids: + for prev_event_id in event.prev_events: self.db_pool.simple_insert_txn( txn, table="insertion_event_extremeties", From baae5d86f561a49786adeb5e1e246ff2dbe7065c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Jul 2021 16:45:06 -0500 Subject: [PATCH 15/53] Share validation logic --- synapse/api/auth.py | 37 ++++++++++++++++++++++++++++++---- synapse/rest/client/v1/room.py | 27 +++++++------------------ 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 307f5f9a9463..42476a18e504 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -240,6 +240,37 @@ async def get_user_by_req( except KeyError: raise MissingClientTokenError() + async def validate_appservice_can_control_user_id( + self, app_service: ApplicationService, user_id: str + ): + """Validates that the app service is allowed to control + the given user. + + Args: + app_service: The app service that controls the user + user_id: The author MXID that the app service is controlling + + Raises: + AuthError: If the application service is not allowed to control the user + (user namespace regex does not match, wrong homeserver, etc) + or if the user has not been registered yet. + """ + + # It's ok if the app service is trying to use the sender from their registration + if app_service.sender == user_id: + pass + # Check to make sure the app service is allowed to control the user + elif not app_service.is_interested_in_user(user_id): + raise AuthError( + 403, + "Application service cannot masquerade as this user (%s)." % user_id, + ) + # Check to make sure the user is already registered on the homeserver + elif not (await self.store.get_user_by_id(user_id)): + raise AuthError( + 403, "Application service has not registered this user (%s)" % user_id + ) + async def _get_appservice_user_id( self, request: Request ) -> Tuple[Optional[str], Optional[ApplicationService]]: @@ -261,13 +292,11 @@ async def _get_appservice_user_id( return app_service.sender, app_service user_id = request.args[b"user_id"][0].decode("utf8") + await self.validate_appservice_can_control_user_id(app_service, user_id) + if app_service.sender == user_id: return app_service.sender, app_service - if not app_service.is_interested_in_user(user_id): - raise AuthError(403, "Application service cannot masquerade as this user.") - if not (await self.store.get_user_by_id(user_id)): - raise AuthError(403, "Application service has not registered this user") return user_id, app_service async def get_user_by_access_token( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 7ff4ad6bf5c7..ebf4e3223089 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -381,7 +381,7 @@ def _create_insertion_event_dict( return insertion_event - async def _create_requester_from_app_service( + async def _create_requester_for_user_id_from_app_service( self, user_id: str, app_service: ApplicationService ) -> Requester: """Creates a new requester for the given user_id @@ -396,20 +396,7 @@ async def _create_requester_from_app_service( Requester object """ - # It's ok if the app service is trying to use the sender from their registration - if app_service.sender == user_id: - pass - # Check to make sure the app service is allowed to control the user - elif not app_service.is_interested_in_user(user_id): - raise AuthError( - 403, - "Application service cannot masquerade as this user (%s)." % user_id, - ) - # Check to make sure the user is already registered on the homeserver - elif not (await self.store.get_user_by_id(user_id)): - raise AuthError( - 403, "Application service has not registered this user (%s)" % user_id - ) + await self.auth.validate_appservice_can_control_user_id(app_service, user_id) return create_requester(user_id, app_service=app_service) @@ -478,7 +465,7 @@ async def on_POST(self, request, room_id): if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( - await self._create_requester_from_app_service( + await self._create_requester_for_user_id_from_app_service( state_event["sender"], requester.app_service ), target=UserID.from_string(event_dict["state_key"]), @@ -500,7 +487,7 @@ async def on_POST(self, request, room_id): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - await self._create_requester_from_app_service( + await self._create_requester_for_user_id_from_app_service( state_event["sender"], requester.app_service ), event_dict, @@ -550,7 +537,7 @@ async def on_POST(self, request, room_id): base_insertion_event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - await self._create_requester_from_app_service( + await self._create_requester_for_user_id_from_app_service( base_insertion_event_dict["sender"], requester.app_service, ), @@ -602,7 +589,7 @@ async def on_POST(self, request, room_id): } event, context = await self.event_creation_handler.create_event( - await self._create_requester_from_app_service( + await self._create_requester_for_user_id_from_app_service( ev["sender"], requester.app_service ), event_dict, @@ -634,7 +621,7 @@ async def on_POST(self, request, room_id): # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): ev = await self.event_creation_handler.handle_new_client_event( - await self._create_requester_from_app_service( + await self._create_requester_for_user_id_from_app_service( event["sender"], requester.app_service ), event=event, From c05e43bf318ed76906091dd0c3ce3bd0bac7f9cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Jul 2021 20:22:38 -0500 Subject: [PATCH 16/53] Add inserted historical messages to /backfill response --- .../databases/main/event_federation.py | 73 ++++++++++++++-- synapse/storage/databases/main/events.py | 85 ++++++++++++++----- .../delta/61/01insertion_event_lookups.sql | 24 ++++-- 3 files changed, 150 insertions(+), 32 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 8b88a01e1710..1b65f4413917 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -673,11 +673,11 @@ async def get_oldest_events_with_depth_in_room(self, room_id): ) def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): - sqlAsdf = "SELECT * FROM insertion_event_extremeties as i" + sqlAsdf = "SELECT * FROM insertion_event_edges as i" txn.execute(sqlAsdf) logger.info("wfeafewawafeawg %s", dict(txn)) - sqlAsdf = "SELECT * FROM insertion_event_extremeties as i WHERE i.room_id = ?" + sqlAsdf = "SELECT * FROM insertion_event_edges as i WHERE i.room_id = ?" txn.execute(sqlAsdf, (room_id,)) logger.info("awfeawefw %s", dict(txn)) @@ -688,7 +688,7 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): # " INNER JOIN event_backward_extremities as b" # " ON g.prev_event_id = b.event_id" # TODO - " INNER JOIN insertion_event_extremeties as i" + " INNER JOIN insertion_event_edges as i" " ON e.event_id = i.insertion_prev_event_id" " WHERE i.room_id = ?" " GROUP BY i.insertion_event_id" @@ -703,7 +703,7 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): " INNER JOIN event_backward_extremities as b" " ON g.prev_event_id = b.event_id" # TODO - # " INNER JOIN insertion_event_extremeties as i" + # " INNER JOIN insertion_event_edges as i" # " ON g.event_id = i.insertion_prev_event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" @@ -961,16 +961,50 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # We want to make sure that we do a breadth-first, "depth" ordered # search. - # TODO + # Look for the prev_event_id connected to the given event_id query = ( "SELECT depth, prev_event_id FROM event_edges" + # Get the depth of the prev_event_id from the events table " INNER JOIN events" " ON prev_event_id = events.event_id" + # Find an event which matches the given event_id " WHERE event_edges.event_id = ?" " AND event_edges.is_state = ?" " LIMIT ?" ) + # Look for the "insertion" events connected to the given event_id + # TODO: Do we need to worry about selecting only from the given room_id? The other query above doesn't + connected_insertion_event_query = ( + "SELECT e.depth, i.insertion_event_id FROM insertion_event_edges AS i" + # Get the depth of the insertion event from the events table + " INNER JOIN events AS e" + " ON e.event_id = i.insertion_event_id" + # Find an insertion event which points via prev_events to the given event_id + " WHERE i.insertion_prev_event_id = ?" + " LIMIT ?" + ) + + # Find any chunk connections of a given insertion event + # TODO: Do we need to worry about selecting only from the given room_id? The other query above doesn't + chunk_connection_query = ( + "SELECT e.depth, c.event_id FROM insertion_events AS i" + # Find the chunk that connects to the given insertion event + " INNER JOIN chunk_edges AS c" + " ON i.next_chunk_id = c.chunk_id" + # Get the depth of the chunk start event from the events table + " INNER JOIN events AS e" + " ON e.event_id = c.event_id" + # Find an insertion event which matches the given event_id + " WHERE i.insertion_event_id = ?" + " LIMIT ?" + ) + + # In a PriorityQueue, the lowest valued entries are retrieved first. + # We're using depth as the priority in the queue. + # Depth is lowest at the oldest-in-time message and highest and + # newest-in-time message. We add events to the queue with a negative depth so that + # we process the newest-in-time messages first going backwards in time. queue = PriorityQueue() for event_id in event_list: @@ -996,9 +1030,36 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_results.add(event_id) + txn.execute( + connected_insertion_event_query, (event_id, limit - len(event_results)) + ) + connected_insertion_event_id_results = list(txn) + logger.info( + "connected_insertion_event_query %s", + connected_insertion_event_id_results, + ) + for row in connected_insertion_event_id_results: + if row[1] not in event_results: + queue.put((-row[0], row[1])) + + # Find any chunk connections for the given insertion event + txn.execute( + chunk_connection_query, (row[1], limit - len(event_results)) + ) + chunk_start_event_id_results = list(txn) + logger.info( + "chunk_start_event_id_results %s", + chunk_start_event_id_results, + ) + for row in chunk_start_event_id_results: + if row[1] not in event_results: + queue.put((-row[0], row[1])) + txn.execute(query, (event_id, False, limit - len(event_results))) + prev_event_id_results = list(txn) + logger.info("prev_event_ids %s", prev_event_id_results) - for row in txn: + for row in prev_event_id_results: if row[1] not in event_results: queue.put((-row[0], row[1])) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 228dce91b0fd..6f9e91dafa94 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1506,6 +1506,7 @@ def _update_metadata_tables_txn( self._handle_insertion_event(txn, event) self._handle_marker_event(txn, event) + self._handle_chunk_id(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1773,10 +1774,27 @@ def _handle_insertion_event(self, txn, event): logger.info("_handle_insertion_event %s", event) + next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) + if next_chunk_id is None: + # Invalid insertion event without next chunk ID + return + + # Keep track of the insertion event and the chunk ID + self.db_pool.simple_insert_txn( + txn, + table="insertion_events", + values={ + "insertion_event_id": event.event_id, + "room_id": event.room_id, + "next_chunk_id": next_chunk_id, + }, + ) + + # Insert an edge for every prev_event connection for prev_event_id in event.prev_events: self.db_pool.simple_insert_txn( txn, - table="insertion_event_extremeties", + table="insertion_event_edges", values={ "insertion_event_id": event.event_id, "room_id": event.room_id, @@ -1798,26 +1816,55 @@ def _handle_marker_event(self, txn, event): logger.info("_handle_marker_event %s", event) - insertion_event_id = event.content.get( - EventContentFields.MSC2716_MARKER_INSERTION - ) - insertion_prev_event_ids = event.content.get( - EventContentFields.MSC2716_MARKER_INSERTION_PREV_EVENTS - ) - if not insertion_event_id or not insertion_prev_event_ids: - # Invalid marker event + # TODO: We should attempt to backfill the insertion event instead + # of trying to pack all of the info in the marker event. Otherwise, + # we need to pack in the insertion_prev_events and insertion_next_chunk_id. + + # insertion_event_id = event.content.get( + # EventContentFields.MSC2716_MARKER_INSERTION + # ) + # insertion_prev_event_ids = event.content.get( + # EventContentFields.MSC2716_MARKER_INSERTION_PREV_EVENTS + # ) + # if not insertion_event_id or not insertion_prev_event_ids: + # # Invalid marker event + # return + + # for prev_event_id in insertion_prev_event_ids: + # self.db_pool.simple_insert_txn( + # txn, + # table="insertion_event_edges", + # values={ + # "insertion_event_id": insertion_event_id, + # "room_id": event.room_id, + # "insertion_prev_event_id": prev_event_id, + # }, + # ) + + def _handle_chunk_id(self, txn, event): + """Handles inserting the chunk connections between the event at the + start of a chunk and an insertion event + + Args: txn event (EventBase) + """ + + chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) + if chunk_id is None: + # No chunk connection to persist return - for prev_event_id in insertion_prev_event_ids: - self.db_pool.simple_insert_txn( - txn, - table="insertion_event_extremeties", - values={ - "insertion_event_id": insertion_event_id, - "room_id": event.room_id, - "insertion_prev_event_id": prev_event_id, - }, - ) + logger.info("_handle_chunk_id %s %s", chunk_id, event) + + # Keep track of the insertion event and the chunk ID + self.db_pool.simple_insert_txn( + txn, + table="chunk_edges", + values={ + "event_id": event.event_id, + "room_id": event.room_id, + "chunk_id": chunk_id, + }, + ) def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index 58b71f1bc411..88cb22ec7058 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -18,23 +18,33 @@ -- and we hit an event which matches `insertion_prev_event_id`, it should backfill -- the "insertion" event and start navigating from there. +CREATE TABLE IF NOT EXISTS insertion_events( + insertion_event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + next_chunk_id TEXT NOT NULL, + UNIQUE (insertion_event_id, room_id, next_chunk_id) +); + +CREATE INDEX IF NOT EXISTS insertion_events_insertion_room_id ON insertion_events(room_id); +CREATE INDEX IF NOT EXISTS insertion_events_insertion_event_id ON insertion_events(insertion_event_id); +CREATE INDEX IF NOT EXISTS insertion_events_next_chunk_id ON insertion_events(next_chunk_id); -CREATE TABLE IF NOT EXISTS insertion_event_extremeties( +CREATE TABLE IF NOT EXISTS insertion_event_edges( insertion_event_id TEXT NOT NULL, room_id TEXT NOT NULL, insertion_prev_event_id TEXT NOT NULL, - UNIQUE (insertion_event_id, room_id, room_id, insertion_prev_event_id) + UNIQUE (insertion_event_id, room_id, insertion_prev_event_id) ); -CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_room_id ON insertion_event_extremeties(room_id); -CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_event_id ON insertion_event_extremeties(insertion_event_id); -CREATE INDEX IF NOT EXISTS insertion_event_extremeties_insertion_prev_event_id ON insertion_event_extremeties(insertion_prev_event_id); +CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); +CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_event_id ON insertion_event_edges(insertion_event_id); +CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); -CREATE TABLE IF NOT EXISTS chunk_connections( +CREATE TABLE IF NOT EXISTS chunk_edges( event_id TEXT NOT NULL, room_id TEXT NOT NULL, chunk_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); -CREATE INDEX IF NOT EXISTS chunk_connections_insertion_chunk_id ON chunk_connections(chunk_id); +CREATE INDEX IF NOT EXISTS chunk_edges_chunk_id ON chunk_edges(chunk_id); From 02b1bea935f45b7954764889ef8534ea20ad24ce Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Jul 2021 14:34:03 -0500 Subject: [PATCH 17/53] Remove debug sql queries --- scripts-dev/complement.sh | 2 +- .../databases/main/event_federation.py | 27 +------------------ 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index a99dfef77631..da2cf6ae67a9 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 1b65f4413917..5a4cc25132ad 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -673,41 +673,16 @@ async def get_oldest_events_with_depth_in_room(self, room_id): ) def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): - sqlAsdf = "SELECT * FROM insertion_event_edges as i" - txn.execute(sqlAsdf) - logger.info("wfeafewawafeawg %s", dict(txn)) - - sqlAsdf = "SELECT * FROM insertion_event_edges as i WHERE i.room_id = ?" - txn.execute(sqlAsdf, (room_id,)) - logger.info("awfeawefw %s", dict(txn)) - - sql = ( - "SELECT i.insertion_event_id, MAX(e.depth) FROM events as e" - # " INNER JOIN event_edges as g" - # " ON g.event_id = e.event_id" - # " INNER JOIN event_backward_extremities as b" - # " ON g.prev_event_id = b.event_id" - # TODO - " INNER JOIN insertion_event_edges as i" - " ON e.event_id = i.insertion_prev_event_id" - " WHERE i.room_id = ?" - " GROUP BY i.insertion_event_id" - ) - - txn.execute(sql, (room_id,)) - sql = ( "SELECT b.event_id, MAX(e.depth) FROM events as e" " INNER JOIN event_edges as g" " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" " ON g.prev_event_id = b.event_id" - # TODO - # " INNER JOIN insertion_event_edges as i" - # " ON g.event_id = i.insertion_prev_event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) + txn.execute(sql, (room_id, False)) return dict(txn) From ab8011bb5d10ff2e6418702c210e1a3ab3855eb3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 01:24:13 -0500 Subject: [PATCH 18/53] Some marker event implemntation trials --- scripts-dev/complement.sh | 2 +- synapse/api/constants.py | 3 --- synapse/storage/databases/main/events.py | 18 ++++++++++++++++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index da2cf6ae67a9..9d08c154da7f 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_already_joined_on_federated_server diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 8363c2bb0f5f..78364100e153 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -196,9 +196,6 @@ class EventContentFields: MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" # For "marker" events MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" - MSC2716_MARKER_INSERTION_PREV_EVENTS = ( - "org.matrix.msc2716.marker.insertion_prev_events" - ) class RoomTypes: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 75d01464e060..107d8ad38d8c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1819,10 +1819,24 @@ def _handle_marker_event(self, txn, event): # TODO: We should attempt to backfill the insertion event instead # of trying to pack all of the info in the marker event. Otherwise, # we need to pack in the insertion_prev_events and insertion_next_chunk_id. + # GET /_matrix/federation/v1/event/{eventId} - # insertion_event_id = event.content.get( - # EventContentFields.MSC2716_MARKER_INSERTION + insertion_event_id = event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION + ) + + # We will trust that the application service sending the marker event is + # also the one that knows about the insertion event + # insertion_event_origin = get_domain_from_id(event.sender) + # m_ev = await self.federation_client.get_event( + # [insertion_event_origin], + # insertion_event_id, + # outlier=True, + # timeout=10000, # ) + # _auth_and_persist_events + # handle_new_client_event + # insertion_prev_event_ids = event.content.get( # EventContentFields.MSC2716_MARKER_INSERTION_PREV_EVENTS # ) From f20ba0264cdf665cd1f3a7a8f4087b3fbe91273e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 01:30:59 -0500 Subject: [PATCH 19/53] Clean up PR --- scripts-dev/complement.sh | 2 +- synapse/events/utils.py | 14 ++-- synapse/handlers/federation.py | 14 ---- .../databases/main/event_federation.py | 12 ++-- synapse/storage/databases/main/events.py | 70 +++---------------- 5 files changed, 24 insertions(+), 88 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 9d08c154da7f..aca32edc176e 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_already_joined_on_federated_server +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 9c115758e9b4..ec96999e4e77 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 585fb6f5284f..991ec9919a95 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1054,14 +1054,10 @@ async def maybe_backfill( with (await self._room_backfill.queue(room_id)): return await self._maybe_backfill_inner(room_id, current_depth, limit) - # Todo async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: extremities = await self.store.get_oldest_events_with_depth_in_room(room_id) - logger.info( - "_maybe_backfill_inner extremities(%d)=%s", len(extremities), extremities - ) if not extremities: logger.debug("Not backfilling as no extremeties found.") @@ -2127,18 +2123,8 @@ async def on_backfill_request( limit = min(limit, 100) events = await self.store.get_backfill_events(room_id, pdu_list, limit) - logger.info( - "on_backfill_request get_backfill_events events(%d)=%s", - len(events), - [f'{ev.content.get("body")}: {ev.type} ({ev.event_id})' for ev in events], - ) events = await filter_events_for_server(self.storage, origin, events) - logger.info( - "on_backfill_request filter_events_for_server events(%d)=%s", - len(events), - events, - ) return events diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7e9031f5a9d6..b2ba63c016ab 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1009,8 +1009,8 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = list(txn) - logger.info( - "connected_insertion_event_query %s", + logger.debug( + "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) for row in connected_insertion_event_id_results: @@ -1022,8 +1022,8 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): chunk_connection_query, (row[1], limit - len(event_results)) ) chunk_start_event_id_results = list(txn) - logger.info( - "chunk_start_event_id_results %s", + logger.debug( + "_get_backfill_events: chunk_start_event_id_results %s", chunk_start_event_id_results, ) for row in chunk_start_event_id_results: @@ -1032,7 +1032,9 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = list(txn) - logger.info("prev_event_ids %s", prev_event_id_results) + logger.debug( + "_get_backfill_events: prev_event_ids %s", prev_event_id_results + ) for row in prev_event_id_results: if row[1] not in event_results: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 107d8ad38d8c..b743876dff14 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1505,7 +1505,6 @@ def _update_metadata_tables_txn( self._handle_event_relations(txn, event) self._handle_insertion_event(txn, event) - self._handle_marker_event(txn, event) self._handle_chunk_id(txn, event) # Store the labels for this event. @@ -1760,19 +1759,19 @@ def _handle_event_relations(self, txn, event): if rel_type == RelationTypes.REPLACE: txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) - def _handle_insertion_event(self, txn, event): + def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): """Handles inserting insertion extremeties during peristence of marker events Args: - txn - event (EventBase) + txn: The database transaction object + event: The event to process """ if event.type != EventTypes.MSC2716_INSERTION: # Not a insertion event return - logger.info("_handle_insertion_event %s", event) + logger.debug("_handle_insertion_event %s", event) next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) if next_chunk_id is None: @@ -1802,64 +1801,13 @@ def _handle_insertion_event(self, txn, event): }, ) - def _handle_marker_event(self, txn, event): - """Handles inserting insertion extremeties during peristence of marker events - - Args: - txn - event (EventBase) - """ - - if event.type != EventTypes.MSC2716_MARKER: - # Not a marker event - return - - logger.info("_handle_marker_event %s", event) - - # TODO: We should attempt to backfill the insertion event instead - # of trying to pack all of the info in the marker event. Otherwise, - # we need to pack in the insertion_prev_events and insertion_next_chunk_id. - # GET /_matrix/federation/v1/event/{eventId} - - insertion_event_id = event.content.get( - EventContentFields.MSC2716_MARKER_INSERTION - ) - - # We will trust that the application service sending the marker event is - # also the one that knows about the insertion event - # insertion_event_origin = get_domain_from_id(event.sender) - # m_ev = await self.federation_client.get_event( - # [insertion_event_origin], - # insertion_event_id, - # outlier=True, - # timeout=10000, - # ) - # _auth_and_persist_events - # handle_new_client_event - - # insertion_prev_event_ids = event.content.get( - # EventContentFields.MSC2716_MARKER_INSERTION_PREV_EVENTS - # ) - # if not insertion_event_id or not insertion_prev_event_ids: - # # Invalid marker event - # return - - # for prev_event_id in insertion_prev_event_ids: - # self.db_pool.simple_insert_txn( - # txn, - # table="insertion_event_edges", - # values={ - # "insertion_event_id": insertion_event_id, - # "room_id": event.room_id, - # "insertion_prev_event_id": prev_event_id, - # }, - # ) - - def _handle_chunk_id(self, txn, event): + def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): """Handles inserting the chunk connections between the event at the start of a chunk and an insertion event - Args: txn event (EventBase) + Args: + txn: The database transaction object + event: The event to process """ chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) @@ -1867,7 +1815,7 @@ def _handle_chunk_id(self, txn, event): # No chunk connection to persist return - logger.info("_handle_chunk_id %s %s", chunk_id, event) + logger.debug("_handle_chunk_id %s %s", chunk_id, event) # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( From 64aeb7330dcb8ad109004545af0496ec74f7a0de Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 01:42:51 -0500 Subject: [PATCH 20/53] Rename insertion_event_id to just event_id --- synapse/storage/databases/main/event_federation.py | 6 +++--- synapse/storage/databases/main/events.py | 4 ++-- .../main/delta/61/01insertion_event_lookups.sql | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b2ba63c016ab..363b9243652b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -951,10 +951,10 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Look for the "insertion" events connected to the given event_id # TODO: Do we need to worry about selecting only from the given room_id? The other query above doesn't connected_insertion_event_query = ( - "SELECT e.depth, i.insertion_event_id FROM insertion_event_edges AS i" + "SELECT e.depth, i.event_id FROM insertion_event_edges AS i" # Get the depth of the insertion event from the events table " INNER JOIN events AS e" - " ON e.event_id = i.insertion_event_id" + " ON e.event_id = i.event_id" # Find an insertion event which points via prev_events to the given event_id " WHERE i.insertion_prev_event_id = ?" " LIMIT ?" @@ -971,7 +971,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): " INNER JOIN events AS e" " ON e.event_id = c.event_id" # Find an insertion event which matches the given event_id - " WHERE i.insertion_event_id = ?" + " WHERE i.event_id = ?" " LIMIT ?" ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b743876dff14..db73b0e01f22 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1783,7 +1783,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): txn, table="insertion_events", values={ - "insertion_event_id": event.event_id, + "event_id": event.event_id, "room_id": event.room_id, "next_chunk_id": next_chunk_id, }, @@ -1795,7 +1795,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): txn, table="insertion_event_edges", values={ - "insertion_event_id": event.event_id, + "event_id": event.event_id, "room_id": event.room_id, "insertion_prev_event_id": prev_event_id, }, diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index 88cb22ec7058..e908ea439082 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -19,25 +19,25 @@ -- the "insertion" event and start navigating from there. CREATE TABLE IF NOT EXISTS insertion_events( - insertion_event_id TEXT NOT NULL, + event_id TEXT NOT NULL, room_id TEXT NOT NULL, next_chunk_id TEXT NOT NULL, - UNIQUE (insertion_event_id, room_id, next_chunk_id) + UNIQUE (event_id, room_id, next_chunk_id) ); CREATE INDEX IF NOT EXISTS insertion_events_insertion_room_id ON insertion_events(room_id); -CREATE INDEX IF NOT EXISTS insertion_events_insertion_event_id ON insertion_events(insertion_event_id); +CREATE INDEX IF NOT EXISTS insertion_events_event_id ON insertion_events(event_id); CREATE INDEX IF NOT EXISTS insertion_events_next_chunk_id ON insertion_events(next_chunk_id); CREATE TABLE IF NOT EXISTS insertion_event_edges( - insertion_event_id TEXT NOT NULL, + event_id TEXT NOT NULL, room_id TEXT NOT NULL, insertion_prev_event_id TEXT NOT NULL, - UNIQUE (insertion_event_id, room_id, insertion_prev_event_id) + UNIQUE (event_id, room_id, insertion_prev_event_id) ); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); -CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_event_id ON insertion_event_edges(insertion_event_id); +CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); CREATE TABLE IF NOT EXISTS chunk_edges( From ea7c30db88dd8e20ed85e05aa8101f8c09672c18 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 01:54:31 -0500 Subject: [PATCH 21/53] Add some better sql comments --- .../main/delta/61/01insertion_event_lookups.sql | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index e908ea439082..c0a380531414 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -13,11 +13,8 @@ * limitations under the License. */ --- Add a table that keeps track of "insertion" events back in the history --- when we get a "marker" event over the "live" timeline. When navigating the DAG --- and we hit an event which matches `insertion_prev_event_id`, it should backfill --- the "insertion" event and start navigating from there. - +-- Add a table that keeps track of "insertion" events and +-- their next_chunk_id's so we can navigate to the next chunk of history. CREATE TABLE IF NOT EXISTS insertion_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, @@ -29,6 +26,10 @@ CREATE INDEX IF NOT EXISTS insertion_events_insertion_room_id ON insertion_event CREATE INDEX IF NOT EXISTS insertion_events_event_id ON insertion_events(event_id); CREATE INDEX IF NOT EXISTS insertion_events_next_chunk_id ON insertion_events(next_chunk_id); +-- Add a table that keeps track of all of the events we are inserting between. +-- We use this when navigating the DAG and when we hit an event which matches +-- `insertion_prev_event_id`, it should backfill from the "insertion" event and +-- navigate the historical messages from there. CREATE TABLE IF NOT EXISTS insertion_event_edges( event_id TEXT NOT NULL, room_id TEXT NOT NULL, @@ -40,6 +41,8 @@ CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_ CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); +-- Add a table that keeps track of how each chunk is labeled. The chunks are +-- connected together based insertion points `next_chunk_id`. CREATE TABLE IF NOT EXISTS chunk_edges( event_id TEXT NOT NULL, room_id TEXT NOT NULL, From 9a6fd3fd46026c4c2f6f897cbf6d8f9aa49118d6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 01:57:47 -0500 Subject: [PATCH 22/53] More accurate description --- synapse/storage/databases/main/events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index db73b0e01f22..740355a94590 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1760,7 +1760,7 @@ def _handle_event_relations(self, txn, event): txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): - """Handles inserting insertion extremeties during peristence of marker events + """Handles keeping track of insertion events and edges/connections Args: txn: The database transaction object @@ -1802,7 +1802,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): ) def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): - """Handles inserting the chunk connections between the event at the + """Handles inserting the chunk edges/connections between the event at the start of a chunk and an insertion event Args: From 0f6179fa673621fc64f67e67643269717e07ad4b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 02:02:27 -0500 Subject: [PATCH 23/53] Add changelog --- changelog.d/10245.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10245.feature diff --git a/changelog.d/10245.feature b/changelog.d/10245.feature new file mode 100644 index 000000000000..41a684347c7a --- /dev/null +++ b/changelog.d/10245.feature @@ -0,0 +1 @@ +Make historical events discoverable from backfill for servers without any scrollback history. From 5970e3fdc702d1d85ba681391990075eafeb908f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 02:26:40 -0500 Subject: [PATCH 24/53] Make it clear what MSC the change is part of --- changelog.d/10245.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/10245.feature b/changelog.d/10245.feature index 41a684347c7a..b3c48cc2cc25 100644 --- a/changelog.d/10245.feature +++ b/changelog.d/10245.feature @@ -1 +1 @@ -Make historical events discoverable from backfill for servers without any scrollback history. +Make historical events discoverable from backfill for servers without any scrollback history (part of MSC2716). From bc133969e41ac2a6de97e85b32c02b88668043d0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 02:26:54 -0500 Subject: [PATCH 25/53] Add more detail on which insertion event came through --- synapse/storage/databases/main/events.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 740355a94590..2baa7786a7fb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1771,13 +1771,15 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Not a insertion event return - logger.debug("_handle_insertion_event %s", event) - next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) if next_chunk_id is None: # Invalid insertion event without next chunk ID return + logger.debug( + "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event + ) + # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( txn, From 669da52ae2eff512fb6c5eaf2354b18db29db351 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 14:49:15 -0500 Subject: [PATCH 26/53] Address review and improve sql queries --- .../databases/main/event_federation.py | 69 +++++++++---------- synapse/storage/databases/main/events.py | 5 +- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 363b9243652b..6c0c8fb12e37 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -937,43 +937,40 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # search. # Look for the prev_event_id connected to the given event_id - query = ( - "SELECT depth, prev_event_id FROM event_edges" - # Get the depth of the prev_event_id from the events table - " INNER JOIN events" - " ON prev_event_id = events.event_id" - # Find an event which matches the given event_id - " WHERE event_edges.event_id = ?" - " AND event_edges.is_state = ?" - " LIMIT ?" - ) + query = """ + SELECT depth, prev_event_id FROM event_edges + /* Get the depth of the prev_event_id from the events table */ + INNER JOIN events + ON prev_event_id = events.event_id + /* Find an event which matches the given event_id */ + WHERE event_edges.event_id = ? + AND event_edges.is_state = ? + LIMIT ? + """ # Look for the "insertion" events connected to the given event_id - # TODO: Do we need to worry about selecting only from the given room_id? The other query above doesn't - connected_insertion_event_query = ( - "SELECT e.depth, i.event_id FROM insertion_event_edges AS i" - # Get the depth of the insertion event from the events table - " INNER JOIN events AS e" - " ON e.event_id = i.event_id" - # Find an insertion event which points via prev_events to the given event_id - " WHERE i.insertion_prev_event_id = ?" - " LIMIT ?" - ) + connected_insertion_event_query = """ + SELECT e.depth, i.event_id FROM insertion_event_edges AS i + /* Get the depth of the insertion event from the events table */ + INNER JOIN events AS e USING (event_id) + /* Find an insertion event which points via prev_events to the given event_id */ + WHERE i.insertion_prev_event_id = ? + LIMIT ? + """ # Find any chunk connections of a given insertion event - # TODO: Do we need to worry about selecting only from the given room_id? The other query above doesn't - chunk_connection_query = ( - "SELECT e.depth, c.event_id FROM insertion_events AS i" - # Find the chunk that connects to the given insertion event - " INNER JOIN chunk_edges AS c" - " ON i.next_chunk_id = c.chunk_id" - # Get the depth of the chunk start event from the events table - " INNER JOIN events AS e" - " ON e.event_id = c.event_id" - # Find an insertion event which matches the given event_id - " WHERE i.event_id = ?" - " LIMIT ?" - ) + chunk_connection_query = """ + SELECT e.depth, c.event_id FROM insertion_events AS i + /* Find the chunk that connects to the given insertion event */ + INNER JOIN chunk_edges AS c + ON i.next_chunk_id = c.chunk_id + /* Get the depth of the chunk start event from the events table */ + INNER JOIN events AS e + ON e.event_id = c.event_id + /* Find an insertion event which matches the given event_id */ + WHERE i.event_id = ? + LIMIT ? + """ # In a PriorityQueue, the lowest valued entries are retrieved first. # We're using depth as the priority in the queue. @@ -1008,7 +1005,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute( connected_insertion_event_query, (event_id, limit - len(event_results)) ) - connected_insertion_event_id_results = list(txn) + connected_insertion_event_id_results = txn.fetchall() logger.debug( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, @@ -1021,7 +1018,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute( chunk_connection_query, (row[1], limit - len(event_results)) ) - chunk_start_event_id_results = list(txn) + chunk_start_event_id_results = txn.fetchall() logger.debug( "_get_backfill_events: chunk_start_event_id_results %s", chunk_start_event_id_results, @@ -1031,7 +1028,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): queue.put((-row[0], row[1])) txn.execute(query, (event_id, False, limit - len(event_results))) - prev_event_id_results = list(txn) + prev_event_id_results = txn.fetchall() logger.debug( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2baa7786a7fb..081d79f1acf2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1760,7 +1760,8 @@ def _handle_event_relations(self, txn, event): txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): - """Handles keeping track of insertion events and edges/connections + """Handles keeping track of insertion events and edges/connections. + Part of MSC2716. Args: txn: The database transaction object @@ -1805,7 +1806,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): """Handles inserting the chunk edges/connections between the event at the - start of a chunk and an insertion event + start of a chunk and an insertion event. Part of MSC2716. Args: txn: The database transaction object From 9a86e0538fb1b4189e8b3c396be75f2a8a58e60a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 15:04:59 -0500 Subject: [PATCH 27/53] Only use event_id as unique constraint --- .../schema/main/delta/61/01insertion_event_lookups.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index c0a380531414..9f00b037a426 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -19,10 +19,9 @@ CREATE TABLE IF NOT EXISTS insertion_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, next_chunk_id TEXT NOT NULL, - UNIQUE (event_id, room_id, next_chunk_id) + UNIQUE (event_id) ); -CREATE INDEX IF NOT EXISTS insertion_events_insertion_room_id ON insertion_events(room_id); CREATE INDEX IF NOT EXISTS insertion_events_event_id ON insertion_events(event_id); CREATE INDEX IF NOT EXISTS insertion_events_next_chunk_id ON insertion_events(next_chunk_id); @@ -34,7 +33,7 @@ CREATE TABLE IF NOT EXISTS insertion_event_edges( event_id TEXT NOT NULL, room_id TEXT NOT NULL, insertion_prev_event_id TEXT NOT NULL, - UNIQUE (event_id, room_id, insertion_prev_event_id) + UNIQUE (event_id) ); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); @@ -47,7 +46,7 @@ CREATE TABLE IF NOT EXISTS chunk_edges( event_id TEXT NOT NULL, room_id TEXT NOT NULL, chunk_id TEXT NOT NULL, - UNIQUE (event_id, room_id) + UNIQUE (event_id) ); CREATE INDEX IF NOT EXISTS chunk_edges_chunk_id ON chunk_edges(chunk_id); From 8999567de4ad983241ad472c137e2705d0517cc2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 23:29:45 -0500 Subject: [PATCH 28/53] Fix test case where insertion event is already in the normal DAG --- scripts-dev/complement.sh | 2 +- synapse/events/utils.py | 14 +++---- .../databases/main/event_federation.py | 37 ++++++++++--------- synapse/storage/databases/main/events.py | 4 +- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index aca32edc176e..fd0206e4962b 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server_-_pre-made_insertion_event diff --git a/synapse/events/utils.py b/synapse/events/utils.py index ec96999e4e77..9c115758e9b4 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 6c0c8fb12e37..a2460fb7ca2c 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -965,8 +965,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): INNER JOIN chunk_edges AS c ON i.next_chunk_id = c.chunk_id /* Get the depth of the chunk start event from the events table */ - INNER JOIN events AS e - ON e.event_id = c.event_id + INNER JOIN events AS e USING (event_id) /* Find an insertion event which matches the given event_id */ WHERE i.event_id = ? LIMIT ? @@ -1006,30 +1005,32 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) for row in connected_insertion_event_id_results: - if row[1] not in event_results: - queue.put((-row[0], row[1])) + connected_insertion_event_depth = row[0] + connected_insertion_event = row[1] + queue.put((-connected_insertion_event_depth, connected_insertion_event)) - # Find any chunk connections for the given insertion event - txn.execute( - chunk_connection_query, (row[1], limit - len(event_results)) - ) - chunk_start_event_id_results = txn.fetchall() - logger.debug( - "_get_backfill_events: chunk_start_event_id_results %s", - chunk_start_event_id_results, - ) - for row in chunk_start_event_id_results: - if row[1] not in event_results: - queue.put((-row[0], row[1])) + # Find any chunk connections for the given insertion event + txn.execute( + chunk_connection_query, + (connected_insertion_event, limit - len(event_results)), + ) + chunk_start_event_id_results = txn.fetchall() + logger.info( + "_get_backfill_events: chunk_start_event_id_results %s", + chunk_start_event_id_results, + ) + for row in chunk_start_event_id_results: + if row[1] not in event_results: + queue.put((-row[0], row[1])) txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 081d79f1acf2..201301440f7e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1777,7 +1777,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next chunk ID return - logger.debug( + logger.info( "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event ) @@ -1818,7 +1818,7 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): # No chunk connection to persist return - logger.debug("_handle_chunk_id %s %s", chunk_id, event) + logger.info("_handle_chunk_id %s %s", chunk_id, event) # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( From 35a4569ce6e42a8a7917a9b9979e8f94a72ce19a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Jul 2021 23:32:24 -0500 Subject: [PATCH 29/53] Remove debug changes --- scripts-dev/complement.sh | 2 +- synapse/events/utils.py | 14 +++++++------- synapse/storage/databases/main/event_federation.py | 6 +++--- synapse/storage/databases/main/events.py | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index fd0206e4962b..aca32edc176e 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_joining_on_federated_server_-_pre-made_insertion_event +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 9c115758e9b4..ec96999e4e77 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index a2460fb7ca2c..37b99c7f8111 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1005,7 +1005,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) @@ -1020,7 +1020,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): (connected_insertion_event, limit - len(event_results)), ) chunk_start_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: chunk_start_event_id_results %s", chunk_start_event_id_results, ) @@ -1030,7 +1030,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 201301440f7e..081d79f1acf2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1777,7 +1777,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next chunk ID return - logger.info( + logger.debug( "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event ) @@ -1818,7 +1818,7 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): # No chunk connection to persist return - logger.info("_handle_chunk_id %s %s", chunk_id, event) + logger.debug("_handle_chunk_id %s %s", chunk_id, event) # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( From 164e32b1d2b893a2927028ff592298f32a7eeb32 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 16 Jul 2021 14:46:04 -0500 Subject: [PATCH 30/53] Add support for MSC2716 marker events --- scripts-dev/complement.sh | 2 +- synapse/storage/databases/main/events.py | 46 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index aca32edc176e..9d08c154da7f 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Historical_messages_are_visible_when_already_joined_on_federated_server diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 081d79f1acf2..ec70c5b925c1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1506,6 +1506,7 @@ def _update_metadata_tables_txn( self._handle_insertion_event(txn, event) self._handle_chunk_id(txn, event) + self._handle_marker_event(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1831,6 +1832,51 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): }, ) + def _handle_marker_event(self, txn, event): + """Handles backfilling the insertion event when we receive a marker + event that points to one + + Args: + txn: The database transaction object + event: The event to process + """ + + if event.type != EventTypes.MSC2716_MARKER: + # Not a marker event + return + + logger.info("_handle_marker_event %s", event) + + # TODO: We should attempt to backfill the insertion event instead + # of trying to pack all of the info in the marker event. Otherwise, + # we need to pack in the insertion_prev_events and insertion_next_chunk_id. + # GET /_matrix/federation/v1/event/{eventId} + + insertion_event_id = event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION + ) + + async def backfill_insertion_event(): + logger.info("marker -> backfill_insertion_event") + # We will trust that the application service sending the marker event is + # also the one that knows about the insertion event + insertion_event_origin = get_domain_from_id(event.sender) + insertion_event = await self.federation_client.get_event( + [insertion_event_origin], + insertion_event_id, + outlier=True, + timeout=10000, + ) + logger.info("marker -> fetched insertion_event %s", insertion_event) + # _auth_and_persist_events + # handle_new_client_event + + # We don't need to do any processing for a marker event coming from the same homeserver + if self.hs.is_mine_id(event.sender): + # TODO: "Note that simply calling a coroutine will not schedule it to be executed" + # https://docs.python.org/3/library/asyncio-task.html + backfill_insertion_event() + def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove any redacted relations from the database. From 435f074541a15d18b751b6a3cbf537f89d2eb19e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 16 Jul 2021 17:24:18 -0500 Subject: [PATCH 31/53] Process markers when we receive it over federation --- synapse/handlers/federation.py | 53 +++++++++++++++++++ .../databases/main/event_federation.py | 6 +-- synapse/storage/databases/main/events.py | 50 +---------------- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 991ec9919a95..41ecd8cebca1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -42,6 +42,7 @@ from synapse import event_auth from synapse.api.constants import ( + EventContentFields, EventTypes, Membership, RejectedReason, @@ -263,6 +264,7 @@ async def on_receive_pdu( state = None # Get missing pdus if necessary. + # We don't need to worry about outliers because TODO! if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. min_depth = await self.get_min_depth_for_context(pdu.room_id) @@ -889,6 +891,57 @@ async def _process_received_pdu( "resync_device_due_to_pdu", self._resync_device, event.sender ) + await self._handle_marker_event(origin, event) + + async def _handle_marker_event(self, origin: str, marker_event: EventBase): + """Handles backfilling the insertion event when we receive a marker + event that points to one + + Args: + origin: Origin of the event. Will be called to get the insertion event + event: The event to process + """ + + if marker_event.type != EventTypes.MSC2716_MARKER: + # Not a marker event + return + + logger.info("_handle_marker_event: received %s", marker_event) + + insertion_event_id = marker_event.content.get( + EventContentFields.MSC2716_MARKER_INSERTION + ) + + if insertion_event_id is None: + # Nothing to retrieve then (invalid marker) + return + + logger.info( + "_handle_marker_event: backfilling insertion event %s", insertion_event_id + ) + + await self._get_events_and_persist( + origin, + marker_event.room_id, + [insertion_event_id], + ) + + insertion_event = await self.store.get_event(insertion_event_id, allow_none=True) + if insertion_event is None: + logger.warning( + "_handle_marker_event: server %s didn't return insertion event %s for marker %s", + origin, + insertion_event_id, + marker_event.event_id, + ) + return + + logger.info( + "_handle_marker_event: Succesfully backfilled insertion event %s from marker event %s", + insertion_event, + marker_event, + ) + async def _resync_device(self, sender: str) -> None: """We have detected that the device list for the given user may be out of sync, so we try and resync them. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 37b99c7f8111..a2460fb7ca2c 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1005,7 +1005,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) @@ -1020,7 +1020,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): (connected_insertion_event, limit - len(event_results)), ) chunk_start_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: chunk_start_event_id_results %s", chunk_start_event_id_results, ) @@ -1030,7 +1030,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ec70c5b925c1..201301440f7e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1506,7 +1506,6 @@ def _update_metadata_tables_txn( self._handle_insertion_event(txn, event) self._handle_chunk_id(txn, event) - self._handle_marker_event(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1778,7 +1777,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next chunk ID return - logger.debug( + logger.info( "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event ) @@ -1819,7 +1818,7 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): # No chunk connection to persist return - logger.debug("_handle_chunk_id %s %s", chunk_id, event) + logger.info("_handle_chunk_id %s %s", chunk_id, event) # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( @@ -1832,51 +1831,6 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): }, ) - def _handle_marker_event(self, txn, event): - """Handles backfilling the insertion event when we receive a marker - event that points to one - - Args: - txn: The database transaction object - event: The event to process - """ - - if event.type != EventTypes.MSC2716_MARKER: - # Not a marker event - return - - logger.info("_handle_marker_event %s", event) - - # TODO: We should attempt to backfill the insertion event instead - # of trying to pack all of the info in the marker event. Otherwise, - # we need to pack in the insertion_prev_events and insertion_next_chunk_id. - # GET /_matrix/federation/v1/event/{eventId} - - insertion_event_id = event.content.get( - EventContentFields.MSC2716_MARKER_INSERTION - ) - - async def backfill_insertion_event(): - logger.info("marker -> backfill_insertion_event") - # We will trust that the application service sending the marker event is - # also the one that knows about the insertion event - insertion_event_origin = get_domain_from_id(event.sender) - insertion_event = await self.federation_client.get_event( - [insertion_event_origin], - insertion_event_id, - outlier=True, - timeout=10000, - ) - logger.info("marker -> fetched insertion_event %s", insertion_event) - # _auth_and_persist_events - # handle_new_client_event - - # We don't need to do any processing for a marker event coming from the same homeserver - if self.hs.is_mine_id(event.sender): - # TODO: "Note that simply calling a coroutine will not schedule it to be executed" - # https://docs.python.org/3/library/asyncio-task.html - backfill_insertion_event() - def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove any redacted relations from the database. From e0e1bd05505ee159c396a1749d90bfcbe06ada93 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 17 Jul 2021 03:00:10 -0500 Subject: [PATCH 32/53] WIP: make hs2 backfill historical messages after marker event --- synapse/handlers/federation.py | 24 +++++-- .../databases/main/event_federation.py | 65 +++++++++++++++---- synapse/storage/databases/main/events.py | 13 ++-- synapse/visibility.py | 13 +++- 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 41ecd8cebca1..c6b3a1bf3aa1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -926,7 +926,9 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): [insertion_event_id], ) - insertion_event = await self.store.get_event(insertion_event_id, allow_none=True) + insertion_event = await self.store.get_event( + insertion_event_id, allow_none=True + ) if insertion_event is None: logger.warning( "_handle_marker_event: server %s didn't return insertion event %s for marker %s", @@ -942,6 +944,10 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): marker_event, ) + await self.store.insert_backward_extremity( + insertion_event_id, marker_event.room_id + ) + async def _resync_device(self, sender: str) -> None: """We have detected that the device list for the given user may be out of sync, so we try and resync them. @@ -1110,7 +1116,12 @@ async def maybe_backfill( async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: - extremities = await self.store.get_oldest_events_with_depth_in_room(room_id) + oldest_events = await self.store.get_oldest_events_with_depth_in_room(room_id) + insertion_events_to_be_backfilled = ( + await self.store.get_insertion_event_backwards_extremities_in_room(room_id) + ) + extremities = {**oldest_events, **insertion_events_to_be_backfilled} + logger.info("_maybe_backfill_inner: extremities %s", extremities) if not extremities: logger.debug("Not backfilling as no extremeties found.") @@ -1143,12 +1154,14 @@ async def _maybe_backfill_inner( # types have. forward_events = await self.store.get_successor_events(list(extremities)) + logger.info("_maybe_backfill_inner: forward_events %s", forward_events) extremities_events = await self.store.get_events( forward_events, redact_behaviour=EventRedactBehaviour.AS_IS, get_prev_content=False, ) + logger.info("_maybe_backfill_inner: extremities_events %s", extremities_events) # We set `check_history_visibility_only` as we might otherwise get false # positives from users having been erased. @@ -1159,6 +1172,9 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) + logger.info( + "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities + ) if not filtered_extremities: return False @@ -1177,7 +1193,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.debug( + logger.info( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -1185,7 +1201,7 @@ async def _maybe_backfill_inner( ) return False - logger.debug( + logger.info( "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s", room_id, current_depth, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index a2460fb7ca2c..8bbdb06c74e1 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -666,26 +666,48 @@ def _get_auth_chain_difference_txn( return {eid for eid, n in event_to_missing_sets.items() if n} async def get_oldest_events_with_depth_in_room(self, room_id): + def get_oldest_events_with_depth_in_room_txn(txn, room_id): + sql = ( + "SELECT b.event_id, MAX(e.depth) FROM events as e" + " INNER JOIN event_edges as g" + " ON g.event_id = e.event_id" + " INNER JOIN event_backward_extremities as b" + " ON g.prev_event_id = b.event_id" + " WHERE b.room_id = ? AND g.is_state is ?" + " GROUP BY b.event_id" + ) + + txn.execute(sql, (room_id, False)) + + return dict(txn) + return await self.db_pool.runInteraction( "get_oldest_events_with_depth_in_room", - self.get_oldest_events_with_depth_in_room_txn, + get_oldest_events_with_depth_in_room_txn, room_id, ) - def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): - sql = ( - "SELECT b.event_id, MAX(e.depth) FROM events as e" - " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id" - " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id" - " WHERE b.room_id = ? AND g.is_state is ?" - " GROUP BY b.event_id" - ) + async def get_insertion_event_backwards_extremities_in_room(self, room_id): + def get_insertion_event_backwards_extremities_in_room_txn(txn, room_id): + sql = """ + SELECT b.event_id, MAX(e.depth) FROM insertion_events as i + /* We only want insertion events that are also marked as backwards extremities */ + INNER JOIN event_backward_extremities as b USING (event_id) + /* Get the depth of the insertion event from the events table */ + INNER JOIN events AS e USING (event_id) + WHERE b.room_id = ? + GROUP BY b.event_id + """ + + txn.execute(sql, (room_id,)) - txn.execute(sql, (room_id, False)) + return dict(txn) - return dict(txn) + return await self.db_pool.runInteraction( + "get_insertion_event_backwards_extremities_in_room", + get_insertion_event_backwards_extremities_in_room_txn, + room_id, + ) async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: """Returns the event ID and depth for the event that has the max depth from a set of event IDs @@ -929,7 +951,7 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): return sorted(events, key=lambda e: -e.depth) def _get_backfill_events(self, txn, room_id, event_list, limit): - logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) + logger.info("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) event_results = set() @@ -1122,6 +1144,21 @@ def _delete_old_forward_extrem_cache_txn(txn): _delete_old_forward_extrem_cache_txn, ) + async def insert_backward_extremity(self, event_id: str, room_id: str) -> None: + def _insert_backward_extremity_txn(txn): + self.db_pool.simple_insert_txn( + txn, + table="event_backward_extremities", + values={ + "event_id": event_id, + "room_id": room_id, + }, + ) + + await self.db_pool.runInteraction( + "_insert_backward_extremity_txn", _insert_backward_extremity_txn + ) + async def insert_received_event_to_staging( self, origin: str, event: EventBase ) -> None: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 201301440f7e..2041d49a10c0 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2091,18 +2091,21 @@ def _update_backward_extremeties(self, txn, events): for ev in events: events_by_room.setdefault(ev.room_id, []).append(ev) + # From the events passed in, add all of the prev events as backwards extremities. + # Ignore any events that are already backwards extrems or outliers. query = ( "INSERT INTO event_backward_extremities (event_id, room_id)" " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" + " SELECT 1 FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" " )" " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" + " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " + " AND outlier = ?" " )" ) + logger.info("_update_backward_extremeties %s", events) txn.execute_batch( query, [ @@ -2113,6 +2116,8 @@ def _update_backward_extremeties(self, txn, events): ], ) + # Delete all these events that we've already fetched and now know that their + # prev events are the new outliers. query = ( "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" diff --git a/synapse/visibility.py b/synapse/visibility.py index 490fb26e8114..e4bd4b077d37 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -366,13 +366,21 @@ def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool if erased_senders: to_return = [] for e in events: - if not is_sender_erased(e, erased_senders): + erased = is_sender_erased(e, erased_senders) + logger.info( + "filter_events_for_server: (all_open) %s erased=%s", e, erased + ) + if not erased: to_return.append(e) elif redact: to_return.append(prune_event(e)) + logger.info("filter_events_for_server: (all_open) to_return=%s", to_return) return to_return + logger.info( + "filter_events_for_server: all_open and no erased senders %s", events + ) # If there are no erased users then we can just return the given list # of events without having to copy it. return events @@ -429,6 +437,9 @@ def include(typ, state_key): for e in events: erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible(e, event_to_state[e.event_id]) + logger.info( + "filter_events_for_server: %s erased=%s visible=%s", e, erased, visible + ) if visible and not erased: to_return.append(e) elif redact: From d63c34c7e502672df456f3311fec87c84ad92067 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 17 Jul 2021 03:37:18 -0500 Subject: [PATCH 33/53] hs2 to better ask for insertion event extremity But running into the `sqlite3.IntegrityError: NOT NULL constraint failed: event_to_state_groups.state_group` error --- synapse/handlers/federation.py | 29 +++++++++++++++++++++-------- synapse/visibility.py | 10 ---------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c6b3a1bf3aa1..ff44c423120e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1116,14 +1116,19 @@ async def maybe_backfill( async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: - oldest_events = await self.store.get_oldest_events_with_depth_in_room(room_id) + oldest_events_with_depth = ( + await self.store.get_oldest_events_with_depth_in_room(room_id) + ) insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - extremities = {**oldest_events, **insertion_events_to_be_backfilled} - logger.info("_maybe_backfill_inner: extremities %s", extremities) + logger.info( + "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", + oldest_events_with_depth, + insertion_events_to_be_backfilled, + ) - if not extremities: + if not oldest_events_with_depth and not insertion_events_to_be_backfilled: logger.debug("Not backfilling as no extremeties found.") return False @@ -1153,11 +1158,13 @@ async def _maybe_backfill_inner( # state *before* the event, ignoring the special casing certain event # types have. - forward_events = await self.store.get_successor_events(list(extremities)) - logger.info("_maybe_backfill_inner: forward_events %s", forward_events) + forward_event_ids = await self.store.get_successor_events( + list(oldest_events_with_depth) + ) + logger.info("_maybe_backfill_inner: forward_event_ids=%s", forward_event_ids) extremities_events = await self.store.get_events( - forward_events, + forward_event_ids, redact_behaviour=EventRedactBehaviour.AS_IS, get_prev_content=False, ) @@ -1176,9 +1183,15 @@ async def _maybe_backfill_inner( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) - if not filtered_extremities: + if not filtered_extremities and not insertion_events_to_be_backfilled: return False + extremities = { + **oldest_events_with_depth, + # TODO: insertion_events_to_be_backfilled is currently skipping the filtered_extremities checks + **insertion_events_to_be_backfilled, + } + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted(extremities.items(), key=lambda e: -int(e[1])) max_depth = sorted_extremeties_tuple[0][1] diff --git a/synapse/visibility.py b/synapse/visibility.py index e4bd4b077d37..c0d0a53c3c41 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -367,20 +367,13 @@ def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool to_return = [] for e in events: erased = is_sender_erased(e, erased_senders) - logger.info( - "filter_events_for_server: (all_open) %s erased=%s", e, erased - ) if not erased: to_return.append(e) elif redact: to_return.append(prune_event(e)) - logger.info("filter_events_for_server: (all_open) to_return=%s", to_return) return to_return - logger.info( - "filter_events_for_server: all_open and no erased senders %s", events - ) # If there are no erased users then we can just return the given list # of events without having to copy it. return events @@ -437,9 +430,6 @@ def include(typ, state_key): for e in events: erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible(e, event_to_state[e.event_id]) - logger.info( - "filter_events_for_server: %s erased=%s visible=%s", e, erased, visible - ) if visible and not erased: to_return.append(e) elif redact: From 2196ba527be5c6109465c7be29e7ec65901d7b91 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 17 Jul 2021 03:59:39 -0500 Subject: [PATCH 34/53] Add insertion_event_extremities table --- synapse/handlers/federation.py | 2 +- synapse/storage/databases/main/event_federation.py | 10 +++++----- synapse/storage/databases/main/events.py | 12 ++++++++++++ .../main/delta/61/01insertion_event_lookups.sql | 10 ++++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ff44c423120e..d3823e24dea7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -944,7 +944,7 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): marker_event, ) - await self.store.insert_backward_extremity( + await self.store.insert_insertion_extremity( insertion_event_id, marker_event.room_id ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 8bbdb06c74e1..e8d513fbddce 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -692,7 +692,7 @@ def get_insertion_event_backwards_extremities_in_room_txn(txn, room_id): sql = """ SELECT b.event_id, MAX(e.depth) FROM insertion_events as i /* We only want insertion events that are also marked as backwards extremities */ - INNER JOIN event_backward_extremities as b USING (event_id) + INNER JOIN insertion_event_extremities as b USING (event_id) /* Get the depth of the insertion event from the events table */ INNER JOIN events AS e USING (event_id) WHERE b.room_id = ? @@ -1144,11 +1144,11 @@ def _delete_old_forward_extrem_cache_txn(txn): _delete_old_forward_extrem_cache_txn, ) - async def insert_backward_extremity(self, event_id: str, room_id: str) -> None: - def _insert_backward_extremity_txn(txn): + async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: + def _insert_insertion_extremity_txn(txn): self.db_pool.simple_insert_txn( txn, - table="event_backward_extremities", + table="insertion_event_extremities", values={ "event_id": event_id, "room_id": room_id, @@ -1156,7 +1156,7 @@ def _insert_backward_extremity_txn(txn): ) await self.db_pool.runInteraction( - "_insert_backward_extremity_txn", _insert_backward_extremity_txn + "_insert_insertion_extremity_txn", _insert_insertion_extremity_txn ) async def insert_received_event_to_staging( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2041d49a10c0..8651f374e43a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1831,6 +1831,18 @@ def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): }, ) + # When we receive an event with a `chunk_id` referencing the + # `next_chunk_id` of the insertion event, we can remove it from the + # `insertion_event_extremities` table. + sql = """ + DELETE FROM insertion_event_extremities WHERE event_id IN ( + SELECT event_id FROM insertion_events + WHERE next_chunk_id = ? + ) + """ + + txn.execute(sql, (chunk_id,)) + def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove any redacted relations from the database. diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index 9f00b037a426..2d33663b00af 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -40,6 +40,16 @@ CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_ CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); +-- Add a table that keeps track of which "insertion" events need to be backfilled +CREATE TABLE IF NOT EXISTS insertion_event_extremities( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + UNIQUE (event_id) +); + +CREATE INDEX IF NOT EXISTS insertion_event_extremities_event_id ON insertion_event_extremities(event_id); +CREATE INDEX IF NOT EXISTS insertion_event_extremities_room_id ON insertion_event_extremities(room_id); + -- Add a table that keeps track of how each chunk is labeled. The chunks are -- connected together based insertion points `next_chunk_id`. CREATE TABLE IF NOT EXISTS chunk_edges( From b2be8cec393b2f37eefe5cf19c86e26575664ad6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 19 Jul 2021 23:13:33 -0500 Subject: [PATCH 35/53] Switch to chunk events so we can auth via power_levels Previously, we were using `content.chunk_id` to connect one chunk to another. But these events can be from any `sender` and we can't tell who should be able to send historical events. We know we only want the application service to do it but these events have the sender of a real historical message, not the application service user ID as the sender. Other federated homeservers also have no indicator which senders are an application service on the originating homeserver. So we want to auth all of the MSC2716 events via power_levels and have them be sent by the application service with proper PL levels in the room. --- synapse/api/constants.py | 6 ++++-- synapse/rest/client/v1/room.py | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 78364100e153..fba96822af16 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -120,6 +120,7 @@ class EventTypes: SpaceParent = "m.space.parent" MSC2716_INSERTION = "org.matrix.msc2716.insertion" + MSC2716_CHUNK = "org.matrix.msc2716.chunk" MSC2716_MARKER = "org.matrix.msc2716.marker" @@ -190,9 +191,10 @@ class EventContentFields: # Used on normal messages to indicate they were historically imported after the fact MSC2716_HISTORICAL = "org.matrix.msc2716.historical" - # For "insertion" events + # For "insertion" events to indicate what the next chunk ID should be in + # order to connect to it MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" - # Used on normal message events to indicate where the chunk connects to + # Used on "chunk" events to indicate which insertion event it connects to MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" # For "marker" events MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index ebf4e3223089..d4e16560faaa 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -553,9 +553,18 @@ async def on_POST(self, request, room_id): ] # Connect this current chunk to the insertion event from the previous chunk - last_event_in_chunk["content"][ - EventContentFields.MSC2716_CHUNK_ID - ] = chunk_id_to_connect_to + chunk_event = { + "type": EventTypes.MSC2716_CHUNK, + "sender": requester.user.to_string(), + "room_id": room_id, + "content": {EventContentFields.MSC2716_CHUNK_ID: chunk_id_to_connect_to}, + # Since the chunk event is put at the end of the chunk, + # where the newest-in-time event is, copy the origin_server_ts from + # the last event we're inserting + "origin_server_ts": last_event_in_chunk["origin_server_ts"], + } + # Add the chunk event to the end of the chunk (newest-in-time) + events_to_create.append(chunk_event) # Add an "insertion" event to the start of each chunk (next to the oldest-in-time # event in the chunk) so the next chunk can be connected to this one. @@ -567,7 +576,7 @@ async def on_POST(self, request, room_id): # the first event we're inserting origin_server_ts=events_to_create[0]["origin_server_ts"], ) - # Prepend the insertion event to the start of the chunk + # Prepend the insertion event to the start of the chunk (oldest-in-time) events_to_create = [insertion_event] + events_to_create event_ids = [] From 04a29fed4700637a90b04d7bedded3584605af0b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Jul 2021 00:48:08 -0500 Subject: [PATCH 36/53] Switch to chunk events for federation --- .../storage/databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 18 +++++++++++------- .../delta/61/01insertion_event_lookups.sql | 6 +++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 37b99c7f8111..baef3e007237 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -962,7 +962,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): chunk_connection_query = """ SELECT e.depth, c.event_id FROM insertion_events AS i /* Find the chunk that connects to the given insertion event */ - INNER JOIN chunk_edges AS c + INNER JOIN chunk_events AS c ON i.next_chunk_id = c.chunk_id /* Get the depth of the chunk start event from the events table */ INNER JOIN events AS e USING (event_id) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 081d79f1acf2..466aa4e8d86e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1505,7 +1505,7 @@ def _update_metadata_tables_txn( self._handle_event_relations(txn, event) self._handle_insertion_event(txn, event) - self._handle_chunk_id(txn, event) + self._handle_chunk_event(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1804,26 +1804,30 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): }, ) - def _handle_chunk_id(self, txn: LoggingTransaction, event: EventBase): - """Handles inserting the chunk edges/connections between the event at the - start of a chunk and an insertion event. Part of MSC2716. + def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): + """Handles inserting the chunk edges/connections between the chunk event + and an insertion event. Part of MSC2716. Args: txn: The database transaction object event: The event to process """ + if event.type != EventTypes.MSC2716_CHUNK: + # Not a chunk event + return + chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) if chunk_id is None: - # No chunk connection to persist + # Invalid chunk event without a chunk ID return - logger.debug("_handle_chunk_id %s %s", chunk_id, event) + logger.debug("_handle_chunk_event chunk_id=%s %s", chunk_id, event) # Keep track of the insertion event and the chunk ID self.db_pool.simple_insert_txn( txn, - table="chunk_edges", + table="chunk_events", values={ "event_id": event.event_id, "room_id": event.room_id, diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index 9f00b037a426..70c05b1b583b 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -41,12 +41,12 @@ CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edg CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); -- Add a table that keeps track of how each chunk is labeled. The chunks are --- connected together based insertion points `next_chunk_id`. -CREATE TABLE IF NOT EXISTS chunk_edges( +-- connected together based on an insertion events `next_chunk_id`. +CREATE TABLE IF NOT EXISTS chunk_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, chunk_id TEXT NOT NULL, UNIQUE (event_id) ); -CREATE INDEX IF NOT EXISTS chunk_edges_chunk_id ON chunk_edges(chunk_id); +CREATE INDEX IF NOT EXISTS chunk_events_chunk_id ON chunk_events(chunk_id); From 258fa57d7ea956c1aedf94d72d1273a098dfde73 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Jul 2021 01:41:44 -0500 Subject: [PATCH 37/53] Add unstable room version to support new historical PL --- synapse/api/room_versions.py | 27 +++++++++++++++++++++++++ synapse/event_auth.py | 39 ++++++++++++++++++++++++++++++++++++ synapse/events/utils.py | 3 +++ synapse/handlers/room.py | 1 + 4 files changed, 70 insertions(+) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index f6c1c97b40ca..e9491065c857 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -73,6 +73,9 @@ class RoomVersion: # MSC2403: Allows join_rules to be set to 'knock', changes auth rules to allow sending # m.room.membership event with membership 'knock'. msc2403_knocking = attr.ib(type=bool) + # MSC2716: Adds m.room.power_levels -> content.historical field to control + # whether "insertion", "chunk", "marker" events can be sent + msc2716_historical = attr.ib(type=bool) class RoomVersions: @@ -88,6 +91,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) V2 = RoomVersion( "2", @@ -101,6 +105,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) V3 = RoomVersion( "3", @@ -114,6 +119,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) V4 = RoomVersion( "4", @@ -127,6 +133,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) V5 = RoomVersion( "5", @@ -140,6 +147,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) V6 = RoomVersion( "6", @@ -153,6 +161,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -166,6 +175,7 @@ class RoomVersions: msc2176_redaction_rules=True, msc3083_join_rules=False, msc2403_knocking=False, + msc2716_historical=False, ) MSC3083 = RoomVersion( "org.matrix.msc3083", @@ -179,6 +189,7 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=True, msc2403_knocking=False, + msc2716_historical=False, ) V7 = RoomVersion( "7", @@ -192,6 +203,21 @@ class RoomVersions: msc2176_redaction_rules=False, msc3083_join_rules=False, msc2403_knocking=True, + msc2716_historical=False, + ) + MSC2716 = RoomVersion( + "org.matrix.msc2716", + RoomDisposition.STABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=True, + special_case_aliases_auth=False, + strict_canonicaljson=True, + limit_notifications_power_levels=True, + msc2176_redaction_rules=False, + msc3083_join_rules=False, + msc2403_knocking=True, + msc2716_historical=True, ) @@ -207,6 +233,7 @@ class RoomVersions: RoomVersions.MSC2176, RoomVersions.MSC3083, RoomVersions.V7, + RoomVersions.MSC2716, ) # Note that we do not include MSC2043 here unless it is enabled in the config. } # type: Dict[str, RoomVersion] diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 89bcf8151589..f2df0f0b7aaf 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -190,6 +190,13 @@ def check( if event.type == EventTypes.Redaction: check_redaction(room_version_obj, event, auth_events) + if ( + event.type == EventTypes.MSC2716_INSERTION + or event.type == EventTypes.MSC2716_CHUNK + or event.type == EventTypes.MSC2716_MARKER + ): + check_historical(room_version_obj, event, auth_events) + logger.debug("Allowing! %s", event) @@ -501,6 +508,38 @@ def check_redaction( raise AuthError(403, "You don't have permission to redact events") +def check_historical( + room_version_obj: RoomVersion, + event: EventBase, + auth_events: StateMap[EventBase], +) -> None: + """Check whether the event sender is allowed to send historical related + events like "insertion", "chunk", and "marker". + + Returns: + None + + Raises: + AuthError if the event sender is not allowed to send historical related events + ("insertion", "chunk", and "marker"). + """ + if not room_version_obj.msc2716_historical: + raise AuthError( + 403, + "Historical events not supported with your room version", + ) + + user_level = get_user_power_level(event.user_id, auth_events) + + historical_level = _get_named_level(auth_events, "historical", 100) + + if user_level < historical_level: + raise AuthError( + 403, + 'You don\'t have permission to send send historical related events ("insertion", "chunk", and "marker")', + ) + + def _check_power_levels( room_version_obj: RoomVersion, event: EventBase, diff --git a/synapse/events/utils.py b/synapse/events/utils.py index ec96999e4e77..21637bd0c7e2 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -124,6 +124,9 @@ def add_fields(*fields): if room_version.msc2176_redaction_rules: add_fields("invite") + if room_version.msc2716_historical: + add_fields("historical") + elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth: add_fields("aliases") elif event_type == EventTypes.RoomHistoryVisibility: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 579b1b93c5fa..c3f41d8808cb 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -955,6 +955,7 @@ async def send(etype: str, content: JsonDict, **kwargs) -> int: "kick": 50, "redact": 50, "invite": 50, + "historical": 100, } # type: JsonDict if config["original_invitees_have_ops"]: From 187ab28611546321e02770944c86f30ee2bc742a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Jul 2021 19:56:44 -0500 Subject: [PATCH 38/53] Messy: Fix undefined state_group for federated historical events ``` 2021-07-13 02:27:57,810 - synapse.handlers.federation - 1248 - ERROR - GET-4 - Failed to backfill from hs1 because NOT NULL constraint failed: event_to_state_groups.state_group Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 1216, in try_backfill await self.backfill( File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 1035, in backfill await self._auth_and_persist_event(dest, event, context, backfilled=True) File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 2222, in _auth_and_persist_event await self._run_push_actions_and_persist_event(event, context, backfilled) File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 2244, in _run_push_actions_and_persist_event await self.persist_events_and_notify( File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 3290, in persist_events_and_notify events, max_stream_token = await self.storage.persistence.persist_events( File "/usr/local/lib/python3.8/site-packages/synapse/logging/opentracing.py", line 774, in _trace_inner return await func(*args, **kwargs) File "/usr/local/lib/python3.8/site-packages/synapse/storage/persist_events.py", line 320, in persist_events ret_vals = await yieldable_gather_results(enqueue, partitioned.items()) File "/usr/local/lib/python3.8/site-packages/synapse/storage/persist_events.py", line 237, in handle_queue_loop ret = await self._per_item_callback( File "/usr/local/lib/python3.8/site-packages/synapse/storage/persist_events.py", line 577, in _persist_event_batch await self.persist_events_store._persist_events_and_state_updates( File "/usr/local/lib/python3.8/site-packages/synapse/storage/databases/main/events.py", line 176, in _persist_events_and_state_updates await self.db_pool.runInteraction( File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 681, in runInteraction result = await self.runWithConnection( File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 770, in runWithConnection return await make_deferred_yieldable( File "/usr/local/lib/python3.8/site-packages/twisted/python/threadpool.py", line 238, in inContext result = inContext.theWork() # type: ignore[attr-defined] File "/usr/local/lib/python3.8/site-packages/twisted/python/threadpool.py", line 254, in inContext.theWork = lambda: context.call( # type: ignore[attr-defined] File "/usr/local/lib/python3.8/site-packages/twisted/python/context.py", line 118, in callWithContext return self.currentContext().callWithContext(ctx, func, *args, **kw) File "/usr/local/lib/python3.8/site-packages/twisted/python/context.py", line 83, in callWithContext return func(*args, **kw) File "/usr/local/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 293, in _runWithConnection compat.reraise(excValue, excTraceback) File "/usr/local/lib/python3.8/site-packages/twisted/python/deprecate.py", line 298, in deprecatedFunction return function(*args, **kwargs) File "/usr/local/lib/python3.8/site-packages/twisted/python/compat.py", line 403, in reraise raise exception.with_traceback(traceback) File "/usr/local/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 284, in _runWithConnection result = func(conn, *args, **kw) File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 765, in inner_func return func(db_conn, *args, **kwargs) File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 549, in new_transaction r = func(cursor, *args, **kwargs) File "/usr/local/lib/python3.8/site-packages/synapse/logging/utils.py", line 69, in wrapped return f(*args, **kwargs) File "/usr/local/lib/python3.8/site-packages/synapse/storage/databases/main/events.py", line 385, in _persist_events_txn self._store_event_state_mappings_txn(txn, events_and_contexts) File "/usr/local/lib/python3.8/site-packages/synapse/storage/databases/main/events.py", line 2065, in _store_event_state_mappings_txn self.db_pool.simple_insert_many_txn( File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 923, in simple_insert_many_txn txn.execute_batch(sql, vals) File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 280, in execute_batch self.executemany(sql, args) File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 300, in executemany self._do_execute(self.txn.executemany, sql, *args) File "/usr/local/lib/python3.8/site-packages/synapse/storage/database.py", line 330, in _do_execute return func(sql, *args) sqlite3.IntegrityError: NOT NULL constraint failed: event_to_state_groups.state_group ``` --- synapse/handlers/federation.py | 68 +++++++++++++++++++++--- synapse/state/__init__.py | 16 ++++++ synapse/storage/databases/main/events.py | 9 ++++ 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d3823e24dea7..11b500f15179 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -924,7 +924,11 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): origin, marker_event.room_id, [insertion_event_id], + # outlier=False, ) + # await self._get_state_after_missing_prev_event( + # origin, marker_event.room_id, insertion_event_id + # ) insertion_event = await self.store.get_event( insertion_event_id, allow_none=True @@ -1078,15 +1082,27 @@ async def backfill( # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) + logger.info("backfill: events=%s", events) for event in events: if event in events_to_state: continue # For paranoia we ensure that these events are marked as # non-outliers + logger.info( + "backfill: persist event_id=%s (%s) outlier=%s", + event.event_id, + event.type, + event.internal_metadata.is_outlier(), + ) assert not event.internal_metadata.is_outlier() context = await self.state_handler.compute_event_context(event) + logger.info( + "backfill: context event_id=%s state_group=%s", + event.event_id, + context.state_group, + ) # We store these one at a time since each event depends on the # previous to work out the state. @@ -1383,7 +1399,12 @@ async def try_backfill(domains: List[str]) -> bool: return False async def _get_events_and_persist( - self, destination: str, room_id: str, events: Iterable[str] + self, + destination: str, + room_id: str, + events: Iterable[str], + # TODO: check if still used + outlier: bool = True, ) -> None: """Fetch the given events from a server, and persist them as outliers. @@ -1405,7 +1426,7 @@ async def get_event(event_id: str): [destination], event_id, room_version, - outlier=True, + outlier=outlier, ) if event is None: logger.warning( @@ -2278,6 +2299,11 @@ async def _auth_and_persist_event( server. backfilled: True if the event was backfilled. """ + logger.info( + "_auth_and_persist_event: before event_id=%s state_group=%s", + event.event_id, + context.state_group, + ) context = await self._check_event_auth( origin, event, @@ -2286,6 +2312,11 @@ async def _auth_and_persist_event( auth_events=auth_events, backfilled=backfilled, ) + logger.info( + "_auth_and_persist_event: after event_id=%s state_group=%s", + event.event_id, + context.state_group, + ) await self._run_push_actions_and_persist_event(event, context, backfilled) @@ -2667,9 +2698,19 @@ async def _check_event_auth( auth_events[(c.type, c.state_key)] = c try: + logger.info( + "_check_event_auth: before event_id=%s state_group=%s", + event.event_id, + context.state_group, + ) context = await self._update_auth_events_and_context_for_auth( origin, event, context, auth_events ) + logger.info( + "_check_event_auth: after event_id=%s state_group=%s", + event.event_id, + context.state_group, + ) except Exception: # We don't really mind if the above fails, so lets not fail # processing if it does. However, it really shouldn't fail so @@ -2756,7 +2797,11 @@ async def _update_auth_events_and_context_for_auth( if missing_auth: # If we don't have all the auth events, we need to get them. - logger.info("auth_events contains unknown events: %s", missing_auth) + logger.info( + "auth_events contains unknown events for event_id=%s, missing_auth=%s", + event.event_id, + missing_auth, + ) try: try: remote_auth_chain = await self.federation_client.get_event_auth( @@ -2793,9 +2838,13 @@ async def _update_auth_events_and_context_for_auth( event.event_id, e.event_id, ) - context = await self.state_handler.compute_event_context(e) + # XXX: Main fix is here. It was computing context for the missing auth event + # and re-assigning to the `context` variable used for the main event + missing_auth_context = ( + await self.state_handler.compute_event_context(e) + ) await self._auth_and_persist_event( - origin, e, context, auth_events=auth + origin, e, missing_auth_context, auth_events=auth ) if e.event_id in event_auth_events: @@ -2806,13 +2855,20 @@ async def _update_auth_events_and_context_for_auth( except Exception: logger.exception("Failed to get auth chain") + logger.info( + "_update_auth_events_and_context_for_auth: check outlier event_id=%s outlier=%s", + event.event_id, + event.internal_metadata.is_outlier(), + ) if event.internal_metadata.is_outlier(): # XXX: given that, for an outlier, we'll be working with the # event's *claimed* auth events rather than those we calculated: # (a) is there any point in this test, since different_auth below will # obviously be empty # (b) alternatively, why don't we do it earlier? - logger.info("Skipping auth_event fetch for outlier") + logger.info( + "Skipping auth_event fetch for outlier event_id=%s", event.event_id + ) return context different_auth = event_auth_events.difference( diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index a1770f620e59..98565156a65b 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -324,6 +324,13 @@ async def compute_event_context( entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids() ) + logger.info( + "compute_event_context: resolve_state_groups_for_events\nstate_ids_before_event=%s\nstate_group_before_event=%s\nstate_group_before_event_prev_group=%s\ndeltas_to_state_group_before_event=%s", + entry.state, + entry.state_group, + entry.prev_group, + entry.delta_ids, + ) state_ids_before_event = entry.state state_group_before_event = entry.state_group @@ -359,6 +366,10 @@ async def compute_event_context( # if not event.is_state(): + logger.info( + "compute_event_context: returning with state_group_before_event=%s", + state_group_before_event, + ) return EventContext.with_state( state_group_before_event=state_group_before_event, state_group=state_group_before_event, @@ -390,6 +401,11 @@ async def compute_event_context( current_state_ids=state_ids_after_event, ) + logger.info( + "compute_event_context: after\nstate_group_after_event=%s", + state_group_after_event, + ) + return EventContext.with_state( state_group=state_group_after_event, state_group_before_event=state_group_before_event, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c3b6164c360a..84270e771772 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2032,6 +2032,13 @@ def _store_event_state_mappings_txn( ): state_groups = {} for event, context in events_and_contexts: + + logger.info( + "creating state_groups grsesegr event_id=%s outlier=%s %s", + event.event_id, + event.internal_metadata.is_outlier(), + event, + ) if event.internal_metadata.is_outlier(): continue @@ -2043,6 +2050,8 @@ def _store_event_state_mappings_txn( state_groups[event.event_id] = context.state_group + logger.info("state_groups asdfasdf %s", state_groups) + self.db_pool.simple_insert_many_txn( txn, table="event_to_state_groups", From 9d70e95094e161cf84fcaf6187c1fa431807b5b5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Jul 2021 19:58:11 -0500 Subject: [PATCH 39/53] Revert "Messy: Fix undefined state_group for federated historical events" This reverts commit 187ab28611546321e02770944c86f30ee2bc742a. --- synapse/handlers/federation.py | 68 +++--------------------- synapse/state/__init__.py | 16 ------ synapse/storage/databases/main/events.py | 9 ---- 3 files changed, 6 insertions(+), 87 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 11b500f15179..d3823e24dea7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -924,11 +924,7 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): origin, marker_event.room_id, [insertion_event_id], - # outlier=False, ) - # await self._get_state_after_missing_prev_event( - # origin, marker_event.room_id, insertion_event_id - # ) insertion_event = await self.store.get_event( insertion_event_id, allow_none=True @@ -1082,27 +1078,15 @@ async def backfill( # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) - logger.info("backfill: events=%s", events) for event in events: if event in events_to_state: continue # For paranoia we ensure that these events are marked as # non-outliers - logger.info( - "backfill: persist event_id=%s (%s) outlier=%s", - event.event_id, - event.type, - event.internal_metadata.is_outlier(), - ) assert not event.internal_metadata.is_outlier() context = await self.state_handler.compute_event_context(event) - logger.info( - "backfill: context event_id=%s state_group=%s", - event.event_id, - context.state_group, - ) # We store these one at a time since each event depends on the # previous to work out the state. @@ -1399,12 +1383,7 @@ async def try_backfill(domains: List[str]) -> bool: return False async def _get_events_and_persist( - self, - destination: str, - room_id: str, - events: Iterable[str], - # TODO: check if still used - outlier: bool = True, + self, destination: str, room_id: str, events: Iterable[str] ) -> None: """Fetch the given events from a server, and persist them as outliers. @@ -1426,7 +1405,7 @@ async def get_event(event_id: str): [destination], event_id, room_version, - outlier=outlier, + outlier=True, ) if event is None: logger.warning( @@ -2299,11 +2278,6 @@ async def _auth_and_persist_event( server. backfilled: True if the event was backfilled. """ - logger.info( - "_auth_and_persist_event: before event_id=%s state_group=%s", - event.event_id, - context.state_group, - ) context = await self._check_event_auth( origin, event, @@ -2312,11 +2286,6 @@ async def _auth_and_persist_event( auth_events=auth_events, backfilled=backfilled, ) - logger.info( - "_auth_and_persist_event: after event_id=%s state_group=%s", - event.event_id, - context.state_group, - ) await self._run_push_actions_and_persist_event(event, context, backfilled) @@ -2698,19 +2667,9 @@ async def _check_event_auth( auth_events[(c.type, c.state_key)] = c try: - logger.info( - "_check_event_auth: before event_id=%s state_group=%s", - event.event_id, - context.state_group, - ) context = await self._update_auth_events_and_context_for_auth( origin, event, context, auth_events ) - logger.info( - "_check_event_auth: after event_id=%s state_group=%s", - event.event_id, - context.state_group, - ) except Exception: # We don't really mind if the above fails, so lets not fail # processing if it does. However, it really shouldn't fail so @@ -2797,11 +2756,7 @@ async def _update_auth_events_and_context_for_auth( if missing_auth: # If we don't have all the auth events, we need to get them. - logger.info( - "auth_events contains unknown events for event_id=%s, missing_auth=%s", - event.event_id, - missing_auth, - ) + logger.info("auth_events contains unknown events: %s", missing_auth) try: try: remote_auth_chain = await self.federation_client.get_event_auth( @@ -2838,13 +2793,9 @@ async def _update_auth_events_and_context_for_auth( event.event_id, e.event_id, ) - # XXX: Main fix is here. It was computing context for the missing auth event - # and re-assigning to the `context` variable used for the main event - missing_auth_context = ( - await self.state_handler.compute_event_context(e) - ) + context = await self.state_handler.compute_event_context(e) await self._auth_and_persist_event( - origin, e, missing_auth_context, auth_events=auth + origin, e, context, auth_events=auth ) if e.event_id in event_auth_events: @@ -2855,20 +2806,13 @@ async def _update_auth_events_and_context_for_auth( except Exception: logger.exception("Failed to get auth chain") - logger.info( - "_update_auth_events_and_context_for_auth: check outlier event_id=%s outlier=%s", - event.event_id, - event.internal_metadata.is_outlier(), - ) if event.internal_metadata.is_outlier(): # XXX: given that, for an outlier, we'll be working with the # event's *claimed* auth events rather than those we calculated: # (a) is there any point in this test, since different_auth below will # obviously be empty # (b) alternatively, why don't we do it earlier? - logger.info( - "Skipping auth_event fetch for outlier event_id=%s", event.event_id - ) + logger.info("Skipping auth_event fetch for outlier") return context different_auth = event_auth_events.difference( diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 98565156a65b..a1770f620e59 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -324,13 +324,6 @@ async def compute_event_context( entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids() ) - logger.info( - "compute_event_context: resolve_state_groups_for_events\nstate_ids_before_event=%s\nstate_group_before_event=%s\nstate_group_before_event_prev_group=%s\ndeltas_to_state_group_before_event=%s", - entry.state, - entry.state_group, - entry.prev_group, - entry.delta_ids, - ) state_ids_before_event = entry.state state_group_before_event = entry.state_group @@ -366,10 +359,6 @@ async def compute_event_context( # if not event.is_state(): - logger.info( - "compute_event_context: returning with state_group_before_event=%s", - state_group_before_event, - ) return EventContext.with_state( state_group_before_event=state_group_before_event, state_group=state_group_before_event, @@ -401,11 +390,6 @@ async def compute_event_context( current_state_ids=state_ids_after_event, ) - logger.info( - "compute_event_context: after\nstate_group_after_event=%s", - state_group_after_event, - ) - return EventContext.with_state( state_group=state_group_after_event, state_group_before_event=state_group_before_event, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 84270e771772..c3b6164c360a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2032,13 +2032,6 @@ def _store_event_state_mappings_txn( ): state_groups = {} for event, context in events_and_contexts: - - logger.info( - "creating state_groups grsesegr event_id=%s outlier=%s %s", - event.event_id, - event.internal_metadata.is_outlier(), - event, - ) if event.internal_metadata.is_outlier(): continue @@ -2050,8 +2043,6 @@ def _store_event_state_mappings_txn( state_groups[event.event_id] = context.state_group - logger.info("state_groups asdfasdf %s", state_groups) - self.db_pool.simple_insert_many_txn( txn, table="event_to_state_groups", From 935263537d07d168852571867e06dafbee01dddb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Jul 2021 04:45:48 -0500 Subject: [PATCH 40/53] Fix federated events being rejected for no state_groups Add fix from https://github.com/matrix-org/synapse/pull/10439 until it merges. --- synapse/handlers/federation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 991ec9919a95..827853db3937 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2711,9 +2711,11 @@ async def _update_auth_events_and_context_for_auth( event.event_id, e.event_id, ) - context = await self.state_handler.compute_event_context(e) + missing_auth_event_context = ( + await self.state_handler.compute_event_context(e) + ) await self._auth_and_persist_event( - origin, e, context, auth_events=auth + origin, e, missing_auth_event_context, auth_events=auth ) if e.event_id in event_auth_events: From f115aec71b07c57d2069bcb299a7d51a79b34552 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Jul 2021 20:57:29 -0500 Subject: [PATCH 41/53] Adapting to experimental room version --- synapse/handlers/federation.py | 12 ++++++++++++ synapse/storage/databases/main/events.py | 2 +- synapse/visibility.py | 3 +-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3bf33bde5c98..04cb32097980 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -906,6 +906,12 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): # Not a marker event return + # Skip processing a marker event if the room version doesn't + # support it. + room_version = await self.store.get_room_version(marker_event.room_id) + if not room_version.msc2716_historical: + return + logger.info("_handle_marker_event: received %s", marker_event) insertion_event_id = marker_event.content.get( @@ -948,6 +954,12 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): insertion_event_id, marker_event.room_id ) + logger.info( + "_handle_marker_event: insertion extremity added %s from marker event %s", + insertion_event, + marker_event, + ) + async def _resync_device(self, sender: str) -> None: """We have detected that the device list for the given user may be out of sync, so we try and resync them. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ccc641d8e5fc..f2f82ffe86ea 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2139,7 +2139,7 @@ def _update_backward_extremeties(self, txn, events): ) # Delete all these events that we've already fetched and now know that their - # prev events are the new outliers. + # prev events are the new backwards extremeties. query = ( "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" diff --git a/synapse/visibility.py b/synapse/visibility.py index 620b3c5b0f91..17532059e9f8 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -366,8 +366,7 @@ def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool if erased_senders: to_return = [] for e in events: - erased = is_sender_erased(e, erased_senders) - if not erased: + if not is_sender_erased(e, erased_senders): to_return.append(e) elif redact: to_return.append(prune_event(e)) From b55315f8b3a6cdbf7b098075a607c883feae32e0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Jul 2021 21:42:44 -0500 Subject: [PATCH 42/53] Some log cleanup --- synapse/handlers/federation.py | 26 +++++++++---------- .../databases/main/event_federation.py | 8 +++--- synapse/storage/databases/main/events.py | 3 +-- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 04cb32097980..5625c02ce210 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -895,11 +895,11 @@ async def _process_received_pdu( async def _handle_marker_event(self, origin: str, marker_event: EventBase): """Handles backfilling the insertion event when we receive a marker - event that points to one + event that points to one. Args: origin: Origin of the event. Will be called to get the insertion event - event: The event to process + marker_event: The event to process """ if marker_event.type != EventTypes.MSC2716_MARKER: @@ -912,7 +912,7 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): if not room_version.msc2716_historical: return - logger.info("_handle_marker_event: received %s", marker_event) + logger.debug("_handle_marker_event: received %s", marker_event) insertion_event_id = marker_event.content.get( EventContentFields.MSC2716_MARKER_INSERTION @@ -922,7 +922,7 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): # Nothing to retrieve then (invalid marker) return - logger.info( + logger.debug( "_handle_marker_event: backfilling insertion event %s", insertion_event_id ) @@ -944,8 +944,8 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): ) return - logger.info( - "_handle_marker_event: Succesfully backfilled insertion event %s from marker event %s", + logger.debug( + "_handle_marker_event: succesfully backfilled insertion event %s from marker event %s", insertion_event, marker_event, ) @@ -954,8 +954,8 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): insertion_event_id, marker_event.room_id ) - logger.info( - "_handle_marker_event: insertion extremity added %s from marker event %s", + logger.debug( + "_handle_marker_event: insertion extremity added for %s from marker event %s", insertion_event, marker_event, ) @@ -1134,7 +1134,7 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - logger.info( + logger.debug( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, insertion_events_to_be_backfilled, @@ -1173,14 +1173,12 @@ async def _maybe_backfill_inner( forward_event_ids = await self.store.get_successor_events( list(oldest_events_with_depth) ) - logger.info("_maybe_backfill_inner: forward_event_ids=%s", forward_event_ids) extremities_events = await self.store.get_events( forward_event_ids, redact_behaviour=EventRedactBehaviour.AS_IS, get_prev_content=False, ) - logger.info("_maybe_backfill_inner: extremities_events %s", extremities_events) # We set `check_history_visibility_only` as we might otherwise get false # positives from users having been erased. @@ -1191,7 +1189,7 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) - logger.info( + logger.debug( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) @@ -1218,7 +1216,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.info( + logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -1226,7 +1224,7 @@ async def _maybe_backfill_inner( ) return False - logger.info( + logger.debug( "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s", room_id, current_depth, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 102b193d0397..23e5a8d03565 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -951,7 +951,7 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): return sorted(events, key=lambda e: -e.depth) def _get_backfill_events(self, txn, room_id, event_list, limit): - logger.info("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) + logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) event_results = set() @@ -1034,7 +1034,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) @@ -1049,7 +1049,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): (connected_insertion_event, limit - len(event_results)), ) chunk_start_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: chunk_start_event_id_results %s", chunk_start_event_id_results, ) @@ -1059,7 +1059,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f2f82ffe86ea..40b53274fb3d 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1781,7 +1781,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next chunk ID return - logger.info( + logger.debug( "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event ) @@ -2127,7 +2127,6 @@ def _update_backward_extremeties(self, txn, events): " )" ) - logger.info("_update_backward_extremeties %s", events) txn.execute_batch( query, [ From 088c3ef5bc50b3d8d4a5aff650e3fe914371305f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Jul 2021 22:22:12 -0500 Subject: [PATCH 43/53] Add better comments around extremity fetching code and why --- .../databases/main/event_federation.py | 68 ++++++++++++++++--- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 23e5a8d03565..d32422043129 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -665,17 +665,47 @@ def _get_auth_chain_difference_txn( # Return all events where not all sets can reach them. return {eid for eid, n in event_to_missing_sets.items() if n} - async def get_oldest_events_with_depth_in_room(self, room_id): + async def get_oldest_events_with_depth_in_room(self, room_id) -> Dict[str, int]: + """Gets the oldest events in the room with depth. + + We use this function so that we can compare and see if someones current + depth at their current scrollback is within pagination range of the + event extremeties. If the current depth is close to the depth of given + oldest event, we can trigger a backfill. + + Args: + room_id: Room where we want to find the oldest events + + Returns: + Map from event_id to depth + """ + def get_oldest_events_with_depth_in_room_txn(txn, room_id): - sql = ( - "SELECT b.event_id, MAX(e.depth) FROM events as e" - " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id" - " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id" - " WHERE b.room_id = ? AND g.is_state is ?" - " GROUP BY b.event_id" - ) + # Assemble a dictionary with event_id -> depth for the oldest events + # we know of in the room. Backwards extremeties are the oldest + # events we know of in the room but we only know of them because + # some other event referenced them by prev_event and aren't peristed + # in our database yet. So we need to look for the events connected + # to the current backwards extremeties. + sql = """ + SELECT b.event_id, MAX(e.depth) FROM events as e + /** + * Get the edge connections from the event_edges table + * so we can see whether this event's prev_events points + * to a backward extremity in the next join. + */ + INNER JOIN event_edges as g + ON g.event_id = e.event_id + /** + * We find the "oldest" events in the room by looking for + * events connected to backwards extremeties (oldest events + * in the room that we know of so far). + */ + INNER JOIN event_backward_extremities as b + ON g.prev_event_id = b.event_id + WHERE b.room_id = ? AND g.is_state is ? + GROUP BY b.event_id + """ txn.execute(sql, (room_id, False)) @@ -687,7 +717,23 @@ def get_oldest_events_with_depth_in_room_txn(txn, room_id): room_id, ) - async def get_insertion_event_backwards_extremities_in_room(self, room_id): + async def get_insertion_event_backwards_extremities_in_room( + self, room_id + ) -> Dict[str, int]: + """Get the insertion events we know about that we haven't backfilled yet. + + We use this function so that we can compare and see if someones current + depth at their current scrollback is within pagination range of the + insertion event. If the current depth is close to the depth of given + insertion event, we can trigger a backfill. + + Args: + room_id: Room where we want to find the oldest events + + Returns: + Map from event_id to depth + """ + def get_insertion_event_backwards_extremities_in_room_txn(txn, room_id): sql = """ SELECT b.event_id, MAX(e.depth) FROM insertion_events as i From 807217064a47fbbf09b47ee30ce5c6ef05d3cef6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Jul 2021 22:24:23 -0500 Subject: [PATCH 44/53] Rename to be more accurate to what the function returns --- synapse/handlers/federation.py | 2 +- synapse/storage/databases/main/event_federation.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5625c02ce210..d8b7366dbbf6 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1129,7 +1129,7 @@ async def _maybe_backfill_inner( self, room_id: str, current_depth: int, limit: int ) -> bool: oldest_events_with_depth = ( - await self.store.get_oldest_events_with_depth_in_room(room_id) + await self.store.get_oldest_event_ids_with_depth_in_room(room_id) ) insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index d32422043129..273caa02400a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -665,7 +665,7 @@ def _get_auth_chain_difference_txn( # Return all events where not all sets can reach them. return {eid for eid, n in event_to_missing_sets.items() if n} - async def get_oldest_events_with_depth_in_room(self, room_id) -> Dict[str, int]: + async def get_oldest_event_ids_with_depth_in_room(self, room_id) -> Dict[str, int]: """Gets the oldest events in the room with depth. We use this function so that we can compare and see if someones current @@ -680,7 +680,7 @@ async def get_oldest_events_with_depth_in_room(self, room_id) -> Dict[str, int]: Map from event_id to depth """ - def get_oldest_events_with_depth_in_room_txn(txn, room_id): + def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): # Assemble a dictionary with event_id -> depth for the oldest events # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because @@ -712,8 +712,8 @@ def get_oldest_events_with_depth_in_room_txn(txn, room_id): return dict(txn) return await self.db_pool.runInteraction( - "get_oldest_events_with_depth_in_room", - get_oldest_events_with_depth_in_room_txn, + "get_oldest_event_ids_with_depth_in_room", + get_oldest_event_ids_with_depth_in_room_txn, room_id, ) From 44b883cd9f1ef678488a278286fc838410b17491 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Jul 2021 22:59:53 -0500 Subject: [PATCH 45/53] Add changelog --- changelog.d/10498.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10498.feature diff --git a/changelog.d/10498.feature b/changelog.d/10498.feature new file mode 100644 index 000000000000..5df896572d2f --- /dev/null +++ b/changelog.d/10498.feature @@ -0,0 +1 @@ +Add support for "marker" events which makes historical events discoverable for servers that already have all of the scrollback history (part of MSC2716). From 526874954668600e67a4a8dd3619f54870ad29c5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 00:25:51 -0500 Subject: [PATCH 46/53] Ignore rejected events --- synapse/handlers/federation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d8b7366dbbf6..1c60795611ca 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -906,6 +906,10 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): # Not a marker event return + if marker_event.rejected_reason is not None: + # Rejected event + return + # Skip processing a marker event if the room version doesn't # support it. room_version = await self.store.get_room_version(marker_event.room_id) From 71c2f054e93649a786daf3f9076add601fe01c81 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 00:26:03 -0500 Subject: [PATCH 47/53] Use simplified upsert --- .../databases/main/event_federation.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 273caa02400a..9b8473e1b2ac 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1198,18 +1198,15 @@ def _delete_old_forward_extrem_cache_txn(txn): ) async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: - def _insert_insertion_extremity_txn(txn): - self.db_pool.simple_insert_txn( - txn, - table="insertion_event_extremities", - values={ - "event_id": event_id, - "room_id": room_id, - }, - ) - - await self.db_pool.runInteraction( - "_insert_insertion_extremity_txn", _insert_insertion_extremity_txn + await self.db_pool.simple_upsert( + table="insertion_event_extremities", + keyvalues={"event_id": event_id}, + values={ + "event_id": event_id, + "room_id": room_id, + }, + insertion_values={}, + desc="insert_insertion_extremity", ) async def insert_received_event_to_staging( From b832264005b3158ae94446ec3b432e820cc21933 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 00:40:10 -0500 Subject: [PATCH 48/53] Add Erik's explanation of extra event checks See https://github.com/matrix-org/synapse/pull/10498#discussion_r680880332 --- synapse/handlers/federation.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1c60795611ca..8b602e3813d4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -263,8 +263,12 @@ async def on_receive_pdu( state = None - # Get missing pdus if necessary. - # We don't need to worry about outliers because TODO! + # Check that the event passes auth based on the state at the event. This is + # done for events that are to be added to the timeline (non-outliers). + # + # Get missing pdus if necessary: + # - Fetching any missing prev events to fill in gaps in the graph + # - Fetching state if we have a hole in the graph if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. min_depth = await self.get_min_depth_for_context(pdu.room_id) @@ -434,6 +438,13 @@ async def on_receive_pdu( affected=event_id, ) + # A second round of checks for all events. Check that the event passes auth + # based on `auth_events`, this allows us to assert that the event would + # have been allowed at some point. If an event passes this check its OK + # for it to be used as part of a returned `/state` request, as either + # a) we received the event as part of the original join and so trust it, or + # b) we'll do a state resolution with existing state before it becomes + # part of the "current state", which adds more protection. await self._process_received_pdu(origin, pdu, state=state) async def _get_missing_events_for_pdu( From 1dc09962521f14fe2cb87e6d6fbc2172b1b8c718 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 21:52:25 -0500 Subject: [PATCH 49/53] Clarify that the depth is not directly correlated to the backwards extremity that we return See https://github.com/matrix-org/synapse/pull/10498#discussion_r681725404 --- synapse/storage/databases/main/event_federation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 9b8473e1b2ac..7471b67a974c 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -666,7 +666,8 @@ def _get_auth_chain_difference_txn( return {eid for eid, n in event_to_missing_sets.items() if n} async def get_oldest_event_ids_with_depth_in_room(self, room_id) -> Dict[str, int]: - """Gets the oldest events in the room with depth. + """Gets the oldest events(backwards extremities) in the room along with the + aproximate depth. We use this function so that we can compare and see if someones current depth at their current scrollback is within pagination range of the @@ -685,8 +686,9 @@ def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because # some other event referenced them by prev_event and aren't peristed - # in our database yet. So we need to look for the events connected - # to the current backwards extremeties. + # in our database yet (meaning we don't know their depth + # specifically). So we need to look for the aproximate depth from + # the events connected to the current backwards extremeties. sql = """ SELECT b.event_id, MAX(e.depth) FROM events as e /** From 32af944efe396e027d667f996b9c49ab3fe84df2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 22:07:34 -0500 Subject: [PATCH 50/53] lock only matters for sqlite See https://github.com/matrix-org/synapse/pull/10498#discussion_r681728061 --- synapse/storage/database.py | 16 ++++++++-------- .../storage/databases/main/event_federation.py | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 4d4643619f68..b2f7851c03af 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -919,14 +919,14 @@ async def simple_upsert( """ `lock` should generally be set to True (the default), but can be set - to False if either of the following are true: - - * there is a UNIQUE INDEX on the key columns. In this case a conflict - will cause an IntegrityError in which case this function will retry - the update. - - * we somehow know that we are the only thread which will be updating - this table. + to False if either of the following are true. + 1. there is a UNIQUE INDEX on the key columns. In this case a conflict + will cause an IntegrityError in which case this function will retry + the update. + 2. we somehow know that we are the only thread which will be updating + this table. + As an additional note, this parameter only matters for SQLite as we will + use native upserts otherwise. Args: table: The table to upsert into diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7471b67a974c..d233e06fbc78 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1209,6 +1209,7 @@ async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: }, insertion_values={}, desc="insert_insertion_extremity", + lock=False, ) async def insert_received_event_to_staging( From 1b7e627c45d2375a6239d95231385c346800c184 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 22:10:49 -0500 Subject: [PATCH 51/53] Move new SQL changes to its own delta file --- .../delta/61/01insertion_event_lookups.sql | 9 ------- .../62/01insertion_event_extremities.sql | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 synapse/storage/schema/main/delta/62/01insertion_event_extremities.sql diff --git a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql index 72f69da4bfa1..7d7bafc631ab 100644 --- a/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql +++ b/synapse/storage/schema/main/delta/61/01insertion_event_lookups.sql @@ -37,15 +37,6 @@ CREATE UNIQUE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_ev CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); CREATE INDEX IF NOT EXISTS insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); --- Add a table that keeps track of which "insertion" events need to be backfilled -CREATE TABLE IF NOT EXISTS insertion_event_extremities( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL -); - -CREATE UNIQUE INDEX IF NOT EXISTS insertion_event_extremities_event_id ON insertion_event_extremities(event_id); -CREATE INDEX IF NOT EXISTS insertion_event_extremities_room_id ON insertion_event_extremities(room_id); - -- Add a table that keeps track of how each chunk is labeled. The chunks are -- connected together based on an insertion events `next_chunk_id`. CREATE TABLE IF NOT EXISTS chunk_events( diff --git a/synapse/storage/schema/main/delta/62/01insertion_event_extremities.sql b/synapse/storage/schema/main/delta/62/01insertion_event_extremities.sql new file mode 100644 index 000000000000..b731ef284ac1 --- /dev/null +++ b/synapse/storage/schema/main/delta/62/01insertion_event_extremities.sql @@ -0,0 +1,24 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Add a table that keeps track of which "insertion" events need to be backfilled +CREATE TABLE IF NOT EXISTS insertion_event_extremities( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS insertion_event_extremities_event_id ON insertion_event_extremities(event_id); +CREATE INDEX IF NOT EXISTS insertion_event_extremities_room_id ON insertion_event_extremities(room_id); From e6e48ed5d5352d5cd826d358cd229334bc104729 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 22:14:07 -0500 Subject: [PATCH 52/53] Clean up upsert docstring --- synapse/storage/database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b2f7851c03af..f260bb4f2888 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -919,14 +919,14 @@ async def simple_upsert( """ `lock` should generally be set to True (the default), but can be set - to False if either of the following are true. + to False if either of the following are true: 1. there is a UNIQUE INDEX on the key columns. In this case a conflict will cause an IntegrityError in which case this function will retry the update. 2. we somehow know that we are the only thread which will be updating this table. - As an additional note, this parameter only matters for SQLite as we will - use native upserts otherwise. + As an additional note, this parameter only matters for old SQLite versions + because we will use native upserts otherwise. Args: table: The table to upsert into From 23bc5e5f8fc9d2be0cbb2be9b8dc1335a20472b4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 3 Aug 2021 22:16:38 -0500 Subject: [PATCH 53/53] Bump database schema version (62) --- synapse/storage/schema/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 36340a652aac..fd4dd67d910a 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 61 +SCHEMA_VERSION = 62 """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the