From 64448b373e83895e1d7a3b12bb7a801d240cb7fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 28 Sep 2021 22:00:04 -0500 Subject: [PATCH 01/21] Fix event context for outliers in important MSC2716 spot Fix event context for outlier causing failures in all of the MSC2716 Complement tests. The `EventContext.for_outlier` refactor happened in https://github.com/matrix-org/synapse/pull/10883 and this spot was left out. --- synapse/handlers/message.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 07aadf3f3c94..a8cd8d994386 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -953,18 +953,13 @@ async def create_new_client_event( depth=depth, ) - old_state = None + context = await self.state.compute_event_context(event) # Pass on the outlier property from the builder to the event # after it is created if builder.internal_metadata.outlier: event.internal_metadata.outlier = builder.internal_metadata.outlier - - # Calculate the state for outliers that pass in their own `auth_event_ids` - if auth_event_ids: - old_state = await self.store.get_events_as_list(auth_event_ids) - - context = await self.state.compute_event_context(event, old_state=old_state) + context = EventContext.for_outlier() if requester: context.app_service = requester.app_service From f3174cdd0d383f1a95978bf6fcf3b7fa19e2c3fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 28 Sep 2021 22:16:39 -0500 Subject: [PATCH 02/21] Add changelog --- changelog.d/10938.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10938.bugfix diff --git a/changelog.d/10938.bugfix b/changelog.d/10938.bugfix new file mode 100644 index 000000000000..afe6f2c6bf24 --- /dev/null +++ b/changelog.d/10938.bugfix @@ -0,0 +1 @@ +Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint not being able to create outlier events. From 6713a2ac2de08419485bae5f833c7d23bee937d3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 12:07:03 -0500 Subject: [PATCH 03/21] Update changelog.d/10938.bugfix Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/10938.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/10938.bugfix b/changelog.d/10938.bugfix index afe6f2c6bf24..9cf0ea8788b9 100644 --- a/changelog.d/10938.bugfix +++ b/changelog.d/10938.bugfix @@ -1 +1 @@ -Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint not being able to create outlier events. +Fix bug introduced in Synapse 1.44 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. From 97fa9a29e48b5ca708aa49e016b6148e61506199 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 12:07:09 -0500 Subject: [PATCH 04/21] Update synapse/handlers/message.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/handlers/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a8cd8d994386..b06ea440af09 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -958,7 +958,7 @@ async def create_new_client_event( # Pass on the outlier property from the builder to the event # after it is created if builder.internal_metadata.outlier: - event.internal_metadata.outlier = builder.internal_metadata.outlier + event.internal_metadata.outlier = True context = EventContext.for_outlier() if requester: From fa4f20dfb7446065e26f5f11715106aacb205128 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 12:09:12 -0500 Subject: [PATCH 05/21] Only generate context when we need to (it's not free to throw away) --- synapse/handlers/message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index b06ea440af09..d69eb32f4151 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -953,13 +953,13 @@ async def create_new_client_event( depth=depth, ) - context = await self.state.compute_event_context(event) - # Pass on the outlier property from the builder to the event # after it is created if builder.internal_metadata.outlier: event.internal_metadata.outlier = True context = EventContext.for_outlier() + else: + context = await self.state.compute_event_context(event) if requester: context.app_service = requester.app_service From 4fea37ed06516b1cff4838833578f796bb35e709 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 18:51:08 -0500 Subject: [PATCH 06/21] Add proper state and state_groups so historical events return state from /context See https://gitlab.com/gitterHQ/webapp/-/merge_requests/2229#note_683532091 Also seems to fix https://github.com/matrix-org/synapse/issues/10764 --- synapse/handlers/message.py | 10 +++++++++- synapse/rest/client/room_batch.py | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d69eb32f4151..79799d774f90 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -959,7 +959,15 @@ async def create_new_client_event( event.internal_metadata.outlier = True context = EventContext.for_outlier() else: - context = await self.state.compute_event_context(event) + old_state = None + # Define the state for historical messages while we know to get all of + # state_groups setup properly when we `compute_event_context`. + if builder.internal_metadata.is_historical() and auth_event_ids: + old_state = await self.store.get_events_as_list(auth_event_ids) + + context = await self.state.compute_event_context(event, old_state=old_state) + + logger.info("create_new_client_event type=%s, event_id=%s context=%s", event.type, event.event_id, context) if requester: context.app_service = requester.app_service diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 1dffcc314793..ab5460fc065f 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -176,6 +176,8 @@ async def _create_requester_for_user_id_from_app_service( async def on_POST( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: + logger.info("room batch send =====================================================") + logger.info("=====================================================================") requester = await self.auth.get_user_by_req(request, allow_guest=False) if not requester.app_service: From 8fb4d6fffe384f4f2b3617ac54ea7451e284b791 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 22:03:13 -0500 Subject: [PATCH 07/21] Force /context to return state for the given historical event --- synapse/handlers/message.py | 2 ++ synapse/handlers/room.py | 15 +++++++++++++-- synapse/rest/client/room.py | 1 + synapse/storage/state.py | 9 ++++++++- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 79799d774f90..f3dad1ab5c23 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -611,6 +611,7 @@ async def create_event( auth_event_ids=auth_event_ids, depth=depth, ) + #logger.info("auth_event_ids before=%s", auth_event_ids) auth_events = await self.store.get_events_as_list(auth_event_ids) # Create a StateMap[str] auth_event_state_map = { @@ -622,6 +623,7 @@ async def create_event( current_state_ids=auth_event_state_map, for_verification=False, ) + #logger.info("auth_event_ids after=%s", auth_event_ids) event, context = await self.create_new_client_event( builder=builder, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 8fede5e935d0..ef556cd058ce 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1166,11 +1166,22 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 + logger.info("get_event_context event_id=%s =====================", event_id) + + event_id_to_get_state_from = last_event_id + + # For historical events, we want to get the state at the specified event. + # TODO: maybe we can change how we're getting events_before and events_after + # here so it still works correctly without this hack + if event.internal_metadata.is_historical(): + event_id_to_get_state_from = event_id + state = await self.state_store.get_state_for_events( - [last_event_id], state_filter=state_filter + [event_id_to_get_state_from], state_filter=state_filter ) + logger.info("get_event_context event_id=%s state=%s", event_id, state) - state_events = list(state[last_event_id].values()) + state_events = list(state[event_id_to_get_state_from].values()) if event_filter: state_events = event_filter.filter(state_events) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index bf46dc60f262..a73b2f958274 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -695,6 +695,7 @@ async def on_GET( results = await self.room_context_handler.get_event_context( requester, room_id, event_id, limit, event_filter ) + logger.info("get /context event_id=%s results=%s", event_id, results) if not results: raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 5e86befde430..90ded4ef81c2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -491,7 +491,6 @@ async def get_state_for_events( [ev_id for sd in group_to_state.values() for ev_id in sd.values()], get_prev_content=False, ) - event_to_state = { event_id: { k: state_event_map[v] @@ -500,6 +499,14 @@ async def get_state_for_events( } for event_id, group in event_to_groups.items() } + logger.info( + "event_to_groups event_ids=%s event_to_groups=%s group_to_state=%s state_event_map=%s event_to_state=%s", + event_ids, + event_to_groups, + group_to_state, + state_event_map, + event_to_state + ) return {event: event_to_state[event] for event in event_ids} From 96d9d111ea584029ed3d7d5c1f40a725e81707e6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Sep 2021 23:59:04 -0500 Subject: [PATCH 08/21] Set full state for each historical event like others --- synapse/handlers/message.py | 58 ++++++++++++++++++++----------------- synapse/handlers/room.py | 8 ++--- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f3dad1ab5c23..9e6a0e0415f7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -600,31 +600,6 @@ async def create_event( builder.internal_metadata.historical = historical - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - #logger.info("auth_event_ids before=%s", auth_event_ids) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - #logger.info("auth_event_ids after=%s", auth_event_ids) - event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -931,6 +906,34 @@ async def create_new_client_event( Tuple of created event, context """ + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + # Copy the full auth state before it stripped down + full_state_ids_at_event = auth_event_ids.copy() + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + #logger.info("auth_event_ids before=%s", auth_event_ids) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + #logger.info("auth_event_ids after=%s", auth_event_ids) + if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -940,6 +943,7 @@ async def create_new_client_event( else: prev_event_ids = await self.store.get_prev_events_for_room(builder.room_id) + # we now ought to have some prev_events (unless it's a create event). # # do a quick sanity check here, rather than waiting until we've created the @@ -964,8 +968,8 @@ async def create_new_client_event( old_state = None # Define the state for historical messages while we know to get all of # state_groups setup properly when we `compute_event_context`. - if builder.internal_metadata.is_historical() and auth_event_ids: - old_state = await self.store.get_events_as_list(auth_event_ids) + if builder.internal_metadata.is_historical() and full_state_ids_at_event: + old_state = await self.store.get_events_as_list(full_state_ids_at_event) context = await self.state.compute_event_context(event, old_state=old_state) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index ef556cd058ce..8b6c96c1b76b 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1162,19 +1162,17 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: else: state_filter = StateFilter.all() + logger.info("get_event_context event_id=%s =====================", event_id) # XXX: why do we return the state as of the last event rather than the # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 - - logger.info("get_event_context event_id=%s =====================", event_id) - event_id_to_get_state_from = last_event_id # For historical events, we want to get the state at the specified event. # TODO: maybe we can change how we're getting events_before and events_after # here so it still works correctly without this hack - if event.internal_metadata.is_historical(): - event_id_to_get_state_from = event_id + # if event.internal_metadata.is_historical(): + # event_id_to_get_state_from = event_id state = await self.state_store.get_state_for_events( [event_id_to_get_state_from], state_filter=state_filter From b20fd16c5611c260284800aa5d9ef6d6eedc20e3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 30 Sep 2021 01:44:44 -0500 Subject: [PATCH 09/21] Fix boolean logic for testing whether msc2716 is enabled --- synapse/handlers/federation_event.py | 4 ++-- synapse/rest/client/room.py | 3 +++ synapse/rest/client/room_batch.py | 1 + synapse/storage/databases/main/events.py | 21 ++++++++++++++++----- synapse/storage/state.py | 24 ++++++++++++++++-------- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 2c4644b4a32d..a2932fb9fc56 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1009,8 +1009,8 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> No room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) if ( not room_version.msc2716_historical - or not self._config.experimental.msc2716_enabled - or marker_event.sender != room_creator + and (not self._config.experimental.msc2716_enabled + or marker_event.sender != room_creator) ): return diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index a73b2f958274..2cfd935df02d 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -557,6 +557,9 @@ def __init__(self, hs: "HomeServer"): async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: + logger.info("room /messages =====================================================") + logger.info("====================================================================") + requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index ab5460fc065f..789fe75acc4d 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -307,6 +307,7 @@ async def on_POST( # Verify the batch_id_from_query corresponds to an actual insertion event # and have the batch connected. + logger.info("get_insertion_event_by_batch_id room_id=%room_id batch_id_from_query=%s", room_id, batch_id_from_query) corresponding_insertion_event_id = ( await self.store.get_insertion_event_by_batch_id( room_id, batch_id_from_query diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cc4e31ec3011..d8bfdef9a4d5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1753,6 +1753,10 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Not a insertion event return + logger.info( + "_handle_insertion_event at the start %s", event + ) + # Skip processing an insertion event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) @@ -1763,10 +1767,17 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): retcol="creator", allow_none=True, ) + logger.info( + "_handle_insertion_event at the start room_version.msc2716_historical=%s self.hs.config.experimental.msc2716_enabled=%s event.sender=%s room_creator=%s", + room_version.msc2716_historical, + self.hs.config.experimental.msc2716_enabled, + event.sender, + room_creator + ) if ( not room_version.msc2716_historical - or not self.hs.config.experimental.msc2716_enabled - or event.sender != room_creator + and (not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator) ): return @@ -1775,7 +1786,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next batch ID return - logger.debug( + logger.info( "_handle_insertion_event (next_batch_id=%s) %s", next_batch_id, event ) @@ -1827,8 +1838,8 @@ def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): ) if ( not room_version.msc2716_historical - or not self.hs.config.experimental.msc2716_enabled - or event.sender != room_creator + and (not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator) ): return diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 90ded4ef81c2..aea3abbb1b87 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -499,14 +499,14 @@ async def get_state_for_events( } for event_id, group in event_to_groups.items() } - logger.info( - "event_to_groups event_ids=%s event_to_groups=%s group_to_state=%s state_event_map=%s event_to_state=%s", - event_ids, - event_to_groups, - group_to_state, - state_event_map, - event_to_state - ) + # logger.info( + # "event_to_groups event_ids=%s event_to_groups=%s group_to_state=%s state_event_map=%s event_to_state=%s", + # event_ids, + # event_to_groups, + # group_to_state, + # state_event_map, + # event_to_state + # ) return {event: event_to_state[event] for event in event_ids} @@ -536,6 +536,14 @@ async def get_state_ids_for_events( for event_id, group in event_to_groups.items() } + logger.info( + "get_state_ids_for_events event_ids=%s event_to_groups=%s group_to_state=%s event_to_state=%s", + event_ids, + event_to_groups, + group_to_state, + event_to_state + ) + return {event: event_to_state[event] for event in event_ids} async def get_state_for_event( From cafb1dcb6e64169d487e4008fe7c817ef6a1c6d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Oct 2021 01:41:13 -0500 Subject: [PATCH 10/21] Remove debug logs --- changelog.d/10938.bugfix | 1 - synapse/handlers/federation_event.py | 7 +++--- synapse/handlers/message.py | 5 ----- synapse/handlers/room.py | 8 ------- synapse/rest/client/room.py | 4 ---- synapse/rest/client/room_batch.py | 3 --- synapse/storage/databases/main/events.py | 27 ++++++------------------ synapse/storage/state.py | 17 +-------------- 8 files changed, 11 insertions(+), 61 deletions(-) delete mode 100644 changelog.d/10938.bugfix diff --git a/changelog.d/10938.bugfix b/changelog.d/10938.bugfix deleted file mode 100644 index 9cf0ea8788b9..000000000000 --- a/changelog.d/10938.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug introduced in Synapse 1.44 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 51741b785423..95c125ed8a98 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1010,10 +1010,9 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> No room_version = await self._store.get_room_version(marker_event.room_id) create_event = await self._store.get_create_event_for_room(marker_event.room_id) room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) - if ( - not room_version.msc2716_historical - and (not self._config.experimental.msc2716_enabled - or marker_event.sender != room_creator) + if not room_version.msc2716_historical and ( + not self._config.experimental.msc2716_enabled + or marker_event.sender != room_creator ): return diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9ef010545d46..3651f097b9c1 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -930,7 +930,6 @@ async def create_new_client_event( auth_event_ids=auth_event_ids, depth=depth, ) - #logger.info("auth_event_ids before=%s", auth_event_ids) auth_events = await self.store.get_events_as_list(auth_event_ids) # Create a StateMap[str] auth_event_state_map = { @@ -942,7 +941,6 @@ async def create_new_client_event( current_state_ids=auth_event_state_map, for_verification=False, ) - #logger.info("auth_event_ids after=%s", auth_event_ids) if prev_event_ids is not None: assert ( @@ -953,7 +951,6 @@ async def create_new_client_event( else: prev_event_ids = await self.store.get_prev_events_for_room(builder.room_id) - # we now ought to have some prev_events (unless it's a create event). # # do a quick sanity check here, rather than waiting until we've created the @@ -983,8 +980,6 @@ async def create_new_client_event( context = await self.state.compute_event_context(event, old_state=old_state) - logger.info("create_new_client_event type=%s, event_id=%s context=%s", event.type, event.event_id, context) - if requester: context.app_service = requester.app_service diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 449fa2c7257c..de8e6e7c11f8 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1164,22 +1164,14 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: else: state_filter = StateFilter.all() - logger.info("get_event_context event_id=%s =====================", event_id) # XXX: why do we return the state as of the last event rather than the # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 event_id_to_get_state_from = last_event_id - # For historical events, we want to get the state at the specified event. - # TODO: maybe we can change how we're getting events_before and events_after - # here so it still works correctly without this hack - # if event.internal_metadata.is_historical(): - # event_id_to_get_state_from = event_id - state = await self.state_store.get_state_for_events( [event_id_to_get_state_from], state_filter=state_filter ) - logger.info("get_event_context event_id=%s state=%s", event_id, state) state_events = list(state[event_id_to_get_state_from].values()) if event_filter: diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index c8240a956e32..ed95189b6d8b 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -557,9 +557,6 @@ def __init__(self, hs: "HomeServer"): async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - logger.info("room /messages =====================================================") - logger.info("====================================================================") - requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 @@ -698,7 +695,6 @@ async def on_GET( results = await self.room_context_handler.get_event_context( requester, room_id, event_id, limit, event_filter ) - logger.info("get /context event_id=%s results=%s", event_id, results) if not results: raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 789fe75acc4d..1dffcc314793 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -176,8 +176,6 @@ async def _create_requester_for_user_id_from_app_service( async def on_POST( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - logger.info("room batch send =====================================================") - logger.info("=====================================================================") requester = await self.auth.get_user_by_req(request, allow_guest=False) if not requester.app_service: @@ -307,7 +305,6 @@ async def on_POST( # Verify the batch_id_from_query corresponds to an actual insertion event # and have the batch connected. - logger.info("get_insertion_event_by_batch_id room_id=%room_id batch_id_from_query=%s", room_id, batch_id_from_query) corresponding_insertion_event_id = ( await self.store.get_insertion_event_by_batch_id( room_id, batch_id_from_query diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b54423028991..19f55c19c5bf 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1753,10 +1753,6 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Not a insertion event return - logger.info( - "_handle_insertion_event at the start %s", event - ) - # Skip processing an insertion event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) @@ -1767,17 +1763,9 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): retcol="creator", allow_none=True, ) - logger.info( - "_handle_insertion_event at the start room_version.msc2716_historical=%s self.hs.config.experimental.msc2716_enabled=%s event.sender=%s room_creator=%s", - room_version.msc2716_historical, - self.hs.config.experimental.msc2716_enabled, - event.sender, - room_creator - ) - if ( - not room_version.msc2716_historical - and (not self.hs.config.experimental.msc2716_enabled - or event.sender != room_creator) + if not room_version.msc2716_historical and ( + not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator ): return @@ -1786,7 +1774,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Invalid insertion event without next batch ID return - logger.info( + logger.debug( "_handle_insertion_event (next_batch_id=%s) %s", next_batch_id, event ) @@ -1836,10 +1824,9 @@ def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): retcol="creator", allow_none=True, ) - if ( - not room_version.msc2716_historical - and (not self.hs.config.experimental.msc2716_enabled - or event.sender != room_creator) + if not room_version.msc2716_historical and ( + not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator ): return diff --git a/synapse/storage/state.py b/synapse/storage/state.py index aea3abbb1b87..5e86befde430 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -491,6 +491,7 @@ async def get_state_for_events( [ev_id for sd in group_to_state.values() for ev_id in sd.values()], get_prev_content=False, ) + event_to_state = { event_id: { k: state_event_map[v] @@ -499,14 +500,6 @@ async def get_state_for_events( } for event_id, group in event_to_groups.items() } - # logger.info( - # "event_to_groups event_ids=%s event_to_groups=%s group_to_state=%s state_event_map=%s event_to_state=%s", - # event_ids, - # event_to_groups, - # group_to_state, - # state_event_map, - # event_to_state - # ) return {event: event_to_state[event] for event in event_ids} @@ -536,14 +529,6 @@ async def get_state_ids_for_events( for event_id, group in event_to_groups.items() } - logger.info( - "get_state_ids_for_events event_ids=%s event_to_groups=%s group_to_state=%s event_to_state=%s", - event_ids, - event_to_groups, - group_to_state, - event_to_state - ) - return {event: event_to_state[event] for event in event_ids} async def get_state_for_event( From 487754f4e16a49f0abf63adbf2f0011a73870e07 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Oct 2021 01:56:23 -0500 Subject: [PATCH 11/21] Restore back to what was before --- synapse/handlers/room.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index de8e6e7c11f8..873e08258ea0 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1167,13 +1167,12 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: # XXX: why do we return the state as of the last event rather than the # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 - event_id_to_get_state_from = last_event_id state = await self.state_store.get_state_for_events( - [event_id_to_get_state_from], state_filter=state_filter + [last_event_id], state_filter=state_filter ) - state_events = list(state[event_id_to_get_state_from].values()) + state_events = list(state[last_event_id].values()) if event_filter: state_events = event_filter.filter(state_events) From 43f13289b32fb3f016aafcbce5bfe36c64c5e404 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Oct 2021 23:39:07 -0500 Subject: [PATCH 12/21] Resolve and share state_groups for all historical events in batch --- synapse/handlers/message.py | 11 +++- synapse/rest/client/room_batch.py | 52 ++++++++++++------- synapse/state/__init__.py | 12 +++++ synapse/storage/databases/main/events.py | 13 +++-- synapse/storage/databases/main/room_batch.py | 13 +++++ synapse/storage/schema/__init__.py | 6 ++- .../65/01msc2716_insertion_event_edges.sql | 17 ++++++ 7 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3651f097b9c1..6ac0f1acd1d0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -617,6 +617,12 @@ async def create_event( auth_event_ids=auth_event_ids, depth=depth, ) + logger.info( + "create_new_client_event %s event=%s state_group=%s", + event.type, + event.event_id, + context._state_group, + ) # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this @@ -978,7 +984,10 @@ async def create_new_client_event( if builder.internal_metadata.is_historical() and full_state_ids_at_event: old_state = await self.store.get_events_as_list(full_state_ids_at_event) - context = await self.state.compute_event_context(event, old_state=old_state) + context = await self.state.compute_event_context( + event, + # old_state=old_state + ) if requester: context.app_service = requester.app_service diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 1dffcc314793..fc2bfc8b9745 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -41,6 +41,10 @@ logger = logging.getLogger(__name__) +def generate_fake_prev_event_id(): + return "$" + random_string(43) + + class RoomBatchSendEventRestServlet(RestServlet): """ API endpoint which can insert a batch of events historically back in time @@ -216,6 +220,10 @@ async def on_POST( prev_state_ids = list(prev_state_map.values()) auth_event_ids = prev_state_ids + # Make the state events float off on their own + + prev_event_id_for_state_chain = generate_fake_prev_event_id() + state_event_ids_at_start = [] for state_event in body["state_events_at_start"]: assert_params_in_dict( @@ -240,9 +248,6 @@ async def on_POST( # Mark all events as historical event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True - # Make the state events float off on their own - fake_prev_event_id = "$" + random_string(43) - # TODO: This is pretty much the same as some other code to handle inserting state in this file if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) @@ -254,8 +259,8 @@ async def on_POST( room_id=room_id, action=membership, content=event_dict["content"], - outlier=True, - prev_event_ids=[fake_prev_event_id], + # outlier=True, + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -274,7 +279,7 @@ async def on_POST( ), event_dict, outlier=True, - prev_event_ids=[fake_prev_event_id], + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -284,6 +289,8 @@ async def on_POST( state_event_ids_at_start.append(event_id) auth_event_ids.append(event_id) + # Connect all the state in a floating chain + prev_event_id_for_state_chain = event_id events_to_create = body["events"] @@ -298,11 +305,6 @@ async def on_POST( batch_id_to_connect_to = batch_id_from_query base_insertion_event = None if batch_id_from_query: - # All but the first base insertion event should point at a fake - # event, which causes the HS to ask for the state at the start of - # the batch later. - prev_event_ids = [fake_prev_event_id] - # Verify the batch_id_from_query corresponds to an actual insertion event # and have the batch connected. corresponding_insertion_event_id = ( @@ -325,14 +327,12 @@ async def on_POST( # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - prev_event_ids = prev_event_ids_from_query - base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), room_id=room_id, origin_server_ts=last_event_in_batch["origin_server_ts"], ) - base_insertion_event_dict["prev_events"] = prev_event_ids.copy() + base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy() ( base_insertion_event, @@ -383,11 +383,20 @@ async def on_POST( # Prepend the insertion event to the start of the batch (oldest-in-time) events_to_create = [insertion_event] + events_to_create + # Also connect the historical event chain to floating state chain, + # which causes the HS to ask for the state at the start of + # the batch later. + prev_event_ids = [prev_event_id_for_state_chain] + event_ids = [] events_to_persist = [] for ev in events_to_create: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) + assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % ( + event.sender, + ) + event_dict = { "type": ev["type"], "origin_server_ts": ev["origin_server_ts"], @@ -410,6 +419,17 @@ async def on_POST( historical=True, depth=inherited_depth, ) + + # Normally this is done when persisting the event but we have to + # pre-emptively do it here because we create all the events first, + # then persist them in another pass below. And we want to share + # state_groups across the whole batch so this lookup needs to work + # for the next event in the batch in this loop. + await self.store.store_state_group_id_for_event_id( + event_id=event.event_id, + state_group_id=context._state_group, + ) + logger.debug( "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", event, @@ -417,10 +437,6 @@ async def on_POST( auth_event_ids, ) - assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( - event.sender, - ) - events_to_persist.append((event, context)) event_id = event.event_id diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index c981df3f18b3..62a1395c8b73 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -303,6 +303,15 @@ async def compute_event_context( entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids() ) + logger.info( + "compute_event_context %s event=%s (event.prev_event_ids=%s) entry.state_group=%s entry.prev_group=%s entry.delta_ids=%s", + event.type, + event.event_id, + event.prev_event_ids(), + entry.state_group, + entry.prev_group, + entry.delta_ids, + ) state_ids_before_event = entry.state state_group_before_event = entry.state_group @@ -400,6 +409,9 @@ async def resolve_state_groups_for_events( state_groups_ids = await self.state_store.get_state_groups_ids( room_id, event_ids ) + logger.info( + "resolve_state_groups_for_events state_groups_ids=%s", state_groups_ids + ) if len(state_groups_ids) == 0: return _StateCacheEntry(state={}, state_group=None) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 19f55c19c5bf..0b5c253b7382 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2069,13 +2069,18 @@ def _store_event_state_mappings_txn( state_groups[event.event_id] = context.state_group - self.db_pool.simple_insert_many_txn( + self.db_pool.simple_upsert_many_txn( txn, table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} + key_names=("event_id",), + key_values=( + (event_id,) for event_id, state_group_id in state_groups.items() + ), + value_names=("state_group", "event_id"), + value_values=( + (state_group_id, event_id) for event_id, state_group_id in state_groups.items() - ], + ), ) for event_id, state_group_id in state_groups.items(): diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 300a563c9e09..dcbce8fdcf03 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -36,3 +36,16 @@ async def get_insertion_event_by_batch_id( retcol="event_id", allow_none=True, ) + + async def store_state_group_id_for_event_id( + self, event_id: str, state_group_id: int + ) -> Optional[str]: + { + await self.db_pool.simple_upsert( + table="event_to_state_groups", + keyvalues={"event_id": event_id}, + values={"state_group": state_group_id, "event_id": event_id}, + # Unique constraint on event_id so we don't have to lock + lock=False, + ) + } diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 1aee741a8bd6..a1d233232685 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 64 # remember to update the list below when updating +SCHEMA_VERSION = 65 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -41,6 +41,10 @@ Changes in SCHEMA_VERSION = 64: - MSC2716: Rename related tables and columns from "chunks" to "batches". + +Changes in SCHEMA_VERSION = 65: + - MSC2716: Remove unique event_id constraint from insertion_event_edges + because an insertion event can have multiple edges. """ diff --git a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql new file mode 100644 index 000000000000..7fbc458465c5 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql @@ -0,0 +1,17 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +DROP INDEX insertion_event_edges_event_id; +CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); From d4946731926e1453273c23d7618ad0b73b983ff4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Oct 2021 23:47:55 -0500 Subject: [PATCH 13/21] Add sql comment --- .../schema/main/delta/65/01msc2716_insertion_event_edges.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql index 7fbc458465c5..9254fd9d2c15 100644 --- a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql +++ b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql @@ -13,5 +13,7 @@ * limitations under the License. */ +-- Recreate the insertion_event_edges event_id index without the unique constraint +-- because an insetion event can have multiple edges. DROP INDEX insertion_event_edges_event_id; CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); From 6005c463d51bd684b93f975f454801371f0a7672 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 2 Oct 2021 00:28:47 -0500 Subject: [PATCH 14/21] Remove unused code and fix lint See https://github.com/matrix-org/synapse/pull/10975#discussion_r720625609 --- synapse/handlers/message.py | 61 ++++++++++-------------- synapse/rest/client/room_batch.py | 4 +- synapse/storage/databases/main/events.py | 12 ++--- 3 files changed, 32 insertions(+), 45 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6ac0f1acd1d0..d70cbf0f835f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -610,6 +610,29 @@ async def create_event( builder.internal_metadata.historical = historical + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -921,33 +944,6 @@ async def create_new_client_event( Returns: Tuple of created event, context """ - - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - # Copy the full auth state before it stripped down - full_state_ids_at_event = auth_event_ids.copy() - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -978,16 +974,7 @@ async def create_new_client_event( event.internal_metadata.outlier = True context = EventContext.for_outlier() else: - old_state = None - # Define the state for historical messages while we know to get all of - # state_groups setup properly when we `compute_event_context`. - if builder.internal_metadata.is_historical() and full_state_ids_at_event: - old_state = await self.store.get_events_as_list(full_state_ids_at_event) - - context = await self.state.compute_event_context( - event, - # old_state=old_state - ) + context = await self.state.compute_event_context(event) if requester: context.app_service = requester.app_service diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index fc2bfc8b9745..ee4e0b9b424a 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) -def generate_fake_prev_event_id(): +def generate_fake_prev_event_id() -> str: return "$" + random_string(43) @@ -420,6 +420,8 @@ async def on_POST( depth=inherited_depth, ) + assert context._state_group + # Normally this is done when persisting the event but we have to # pre-emptively do it here because we create all the events first, # then persist them in another pass below. And we want to share diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 0b5c253b7382..9cc5b44ce5fa 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2072,15 +2072,13 @@ def _store_event_state_mappings_txn( self.db_pool.simple_upsert_many_txn( txn, table="event_to_state_groups", - key_names=("event_id",), - key_values=( - (event_id,) for event_id, state_group_id in state_groups.items() - ), - value_names=("state_group", "event_id"), - value_values=( + key_names=["event_id"], + key_values=[(event_id,) for event_id, _ in state_groups.items()], + value_names=["state_group", "event_id"], + value_values=[ (state_group_id, event_id) for event_id, state_group_id in state_groups.items() - ), + ], ) for event_id, state_group_id in state_groups.items(): From 1227154b62a3b014c86819c7feb12b42065bf8f0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 2 Oct 2021 00:31:05 -0500 Subject: [PATCH 15/21] Add changelog --- changelog.d/10975.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10975.feature diff --git a/changelog.d/10975.feature b/changelog.d/10975.feature new file mode 100644 index 000000000000..167426e1fcbd --- /dev/null +++ b/changelog.d/10975.feature @@ -0,0 +1 @@ +Resolve and share `state_groups` for all [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical events in batch. From 10c91ee1aa396d6026018f33c753b1a1701d6d07 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 2 Oct 2021 01:23:23 -0500 Subject: [PATCH 16/21] Fix upsert many being weird with combining key and value columns which duplicate each other Error when running with Postgres: ``` psycopg2.errors.DuplicateColumn: column "event_id" specified more than once LINE 1: ...NTO event_to_state_groups (event_id, state_group, event_id) ... ``` SQL before: ``` INSERT INTO event_to_state_groups (event_id, state_group, event_id) VALUES ? ON CONFLICT (event_id) DO UPDATE SET state_group=EXCLUDED.state_group, event_id=EXCLUDED.event_id args=[('$2Smpvyp_NC1EQBranDEce7AkqCcV7pCkeSE5mYrEMpA', 2, '$2Smpvyp_NC1EQBranDEce7AkqCcV7pCkeSE5mYrEMpA')] ``` SQL after: ``` INSERT INTO event_to_state_groups (event_id, state_group) VALUES ? ON CONFLICT (event_id) DO UPDATE SET state_group=EXCLUDED.state_group args=[('$jfJYRPWil-HfZFRFC_5biWjYzQJ6FRpgWBO7MV-4vds', 3)] ``` --- synapse/storage/databases/main/events.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9cc5b44ce5fa..37439f85628e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2073,11 +2073,10 @@ def _store_event_state_mappings_txn( txn, table="event_to_state_groups", key_names=["event_id"], - key_values=[(event_id,) for event_id, _ in state_groups.items()], - value_names=["state_group", "event_id"], + key_values=[[event_id] for event_id, _ in state_groups.items()], + value_names=["state_group"], value_values=[ - (state_group_id, event_id) - for event_id, state_group_id in state_groups.items() + [state_group_id] for _, state_group_id in state_groups.items() ], ) From d0d66990d166a929ed8a1fc17662381454615e78 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 2 Oct 2021 02:04:26 -0500 Subject: [PATCH 17/21] Add findings when testing with Element --- synapse/rest/client/room_batch.py | 4 ++++ synapse/state/__init__.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index ee4e0b9b424a..cfd2d0727d83 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -259,6 +259,10 @@ async def on_POST( room_id=room_id, action=membership, content=event_dict["content"], + # TODO: I think making this a non-outlier makes the state + # resolve into the current state and shows a bunch of noice + # in the room. In reality, we just want the state_group + # created to share with the rest of the batch. # outlier=True, prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 62a1395c8b73..6c0bf5114389 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -304,13 +304,14 @@ async def compute_event_context( event.room_id, event.prev_event_ids() ) logger.info( - "compute_event_context %s event=%s (event.prev_event_ids=%s) entry.state_group=%s entry.prev_group=%s entry.delta_ids=%s", + "compute_event_context %s event=%s (event.prev_event_ids=%s) entry.state_group=%s entry.prev_group=%s entry.delta_ids=%s entry.state=%s", event.type, event.event_id, event.prev_event_ids(), entry.state_group, entry.prev_group, entry.delta_ids, + entry.state, ) state_ids_before_event = entry.state From aa2e56e62460a0457050505969d1c1fb7e3523e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 4 Oct 2021 22:33:29 -0500 Subject: [PATCH 18/21] Connect the state to the insertion event which is inherited by the rest of the batch --- synapse/handlers/message.py | 57 ++++++++++++++++++------------- synapse/rest/client/room_batch.py | 2 +- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d70cbf0f835f..f29df4a3c167 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -610,29 +610,6 @@ async def create_event( builder.internal_metadata.historical = historical - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -944,6 +921,33 @@ async def create_new_client_event( Returns: Tuple of created event, context """ + + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + # Copy the full auth state before it stripped down + full_state_ids_at_event = auth_event_ids.copy() + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -973,6 +977,13 @@ async def create_new_client_event( if builder.internal_metadata.outlier: event.internal_metadata.outlier = True context = EventContext.for_outlier() + elif ( + event.type == EventTypes.MSC2716_INSERTION + and full_state_ids_at_event + and builder.internal_metadata.is_historical() + ): + old_state = await self.store.get_events_as_list(full_state_ids_at_event) + context = await self.state.compute_event_context(event, old_state=old_state) else: context = await self.state.compute_event_context(event) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index cfd2d0727d83..f4468788bca8 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -263,7 +263,7 @@ async def on_POST( # resolve into the current state and shows a bunch of noice # in the room. In reality, we just want the state_group # created to share with the rest of the batch. - # outlier=True, + outlier=True, prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same From 3b085ab7de1f5f36427dc3c925038d6c17154522 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 4 Oct 2021 22:49:35 -0500 Subject: [PATCH 19/21] Remove debug logging --- synapse/handlers/message.py | 6 ------ synapse/rest/client/room_batch.py | 4 ---- synapse/state/__init__.py | 13 ------------- 3 files changed, 23 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f29df4a3c167..08c0bd08df12 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -617,12 +617,6 @@ async def create_event( auth_event_ids=auth_event_ids, depth=depth, ) - logger.info( - "create_new_client_event %s event=%s state_group=%s", - event.type, - event.event_id, - context._state_group, - ) # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index f4468788bca8..35c2ff9fffaf 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -259,10 +259,6 @@ async def on_POST( room_id=room_id, action=membership, content=event_dict["content"], - # TODO: I think making this a non-outlier makes the state - # resolve into the current state and shows a bunch of noice - # in the room. In reality, we just want the state_group - # created to share with the rest of the batch. outlier=True, prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 6c0bf5114389..c981df3f18b3 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -303,16 +303,6 @@ async def compute_event_context( entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids() ) - logger.info( - "compute_event_context %s event=%s (event.prev_event_ids=%s) entry.state_group=%s entry.prev_group=%s entry.delta_ids=%s entry.state=%s", - event.type, - event.event_id, - event.prev_event_ids(), - entry.state_group, - entry.prev_group, - entry.delta_ids, - entry.state, - ) state_ids_before_event = entry.state state_group_before_event = entry.state_group @@ -410,9 +400,6 @@ async def resolve_state_groups_for_events( state_groups_ids = await self.state_store.get_state_groups_ids( room_id, event_ids ) - logger.info( - "resolve_state_groups_for_events state_groups_ids=%s", state_groups_ids - ) if len(state_groups_ids) == 0: return _StateCacheEntry(state={}, state_group=None) From dc34f0f5c00735da926546edd7b01ce0b9935c0e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 03:58:20 -0500 Subject: [PATCH 20/21] Label fake event ID's as fake --- synapse/rest/client/room_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 35c2ff9fffaf..5549dfca94d0 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -42,7 +42,7 @@ def generate_fake_prev_event_id() -> str: - return "$" + random_string(43) + return "$fake_" + random_string(43) class RoomBatchSendEventRestServlet(RestServlet): From 1d1830d5b66951ed4236bfa062861da1cdafdbf4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 21:38:56 -0500 Subject: [PATCH 21/21] Fix typo --- .../schema/main/delta/65/01msc2716_insertion_event_edges.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql index 9254fd9d2c15..98b25daf451b 100644 --- a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql +++ b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql @@ -14,6 +14,6 @@ */ -- Recreate the insertion_event_edges event_id index without the unique constraint --- because an insetion event can have multiple edges. +-- because an insertion event can have multiple edges. DROP INDEX insertion_event_edges_event_id; CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id);