Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Resolve and share state_groups for all historical events in batch (MSC2716) #10975

Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
64448b3
Fix event context for outliers in important MSC2716 spot
MadLittleMods Sep 29, 2021
f3174cd
Add changelog
MadLittleMods Sep 29, 2021
6713a2a
Update changelog.d/10938.bugfix
MadLittleMods Sep 29, 2021
97fa9a2
Update synapse/handlers/message.py
MadLittleMods Sep 29, 2021
fa4f20d
Only generate context when we need to (it's not free to throw away)
MadLittleMods Sep 29, 2021
4fea37e
Add proper state and state_groups so historical events return state f…
MadLittleMods Sep 29, 2021
8fb4d6f
Force /context to return state for the given historical event
MadLittleMods Sep 30, 2021
96d9d11
Set full state for each historical event like others
MadLittleMods Sep 30, 2021
b20fd16
Fix boolean logic for testing whether msc2716 is enabled
MadLittleMods Sep 30, 2021
0362887
Merge branch 'develop' into madlittlemods/msc2716-resolve-state-for-a…
MadLittleMods Oct 1, 2021
cafb1dc
Remove debug logs
MadLittleMods Oct 1, 2021
487754f
Restore back to what was before
MadLittleMods Oct 1, 2021
43f1328
Resolve and share state_groups for all historical events in batch
MadLittleMods Oct 2, 2021
d494673
Add sql comment
MadLittleMods Oct 2, 2021
6005c46
Remove unused code and fix lint
MadLittleMods Oct 2, 2021
1227154
Add changelog
MadLittleMods Oct 2, 2021
10c91ee
Fix upsert many being weird with combining key and value columns whic…
MadLittleMods Oct 2, 2021
d0d6699
Add findings when testing with Element
MadLittleMods Oct 2, 2021
aa2e56e
Connect the state to the insertion event which is inherited by the re…
MadLittleMods Oct 5, 2021
3b085ab
Remove debug logging
MadLittleMods Oct 5, 2021
b975bd2
Merge branch 'develop' into madlittlemods/msc2716-resolve-state-for-a…
MadLittleMods Oct 5, 2021
c5ea94c
Merge branch 'develop' into madlittlemods/msc2716-resolve-state-for-a…
MadLittleMods Oct 8, 2021
dc34f0f
Label fake event ID's as fake
MadLittleMods Oct 8, 2021
77ffb69
Merge branch 'develop' into madlittlemods/msc2716-resolve-state-for-a…
MadLittleMods Oct 9, 2021
1d1830d
Fix typo
MadLittleMods Oct 9, 2021
14d6672
Merge branch 'develop' into madlittlemods/msc2716-resolve-state-for-a…
MadLittleMods Oct 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10975.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Resolve and share `state_groups` for all [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical events in batch.
57 changes: 34 additions & 23 deletions synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,29 +607,6 @@ async def create_event(

builder.internal_metadata.historical = historical

# Strip down the auth_event_ids to only what we need to auth the event.
# For example, we don't need extra m.room.member that don't match event.sender
if auth_event_ids is not None:
# If auth events are provided, prev events must be also.
assert prev_event_ids is not None

temp_event = await builder.build(
prev_event_ids=prev_event_ids,
auth_event_ids=auth_event_ids,
depth=depth,
)
auth_events = await self.store.get_events_as_list(auth_event_ids)
# Create a StateMap[str]
auth_event_state_map = {
(e.type, e.state_key): e.event_id for e in auth_events
}
# Actually strip down and use the necessary auth events
auth_event_ids = self._event_auth_handler.compute_auth_events(
event=temp_event,
current_state_ids=auth_event_state_map,
for_verification=False,
)

event, context = await self.create_new_client_event(
builder=builder,
requester=requester,
Expand Down Expand Up @@ -936,6 +913,33 @@ async def create_new_client_event(
Tuple of created event, context
"""

# Strip down the auth_event_ids to only what we need to auth the event.
# For example, we don't need extra m.room.member that don't match event.sender
full_state_ids_at_event = None
if auth_event_ids is not None:
# If auth events are provided, prev events must be also.
assert prev_event_ids is not None

# Copy the full auth state before it stripped down
full_state_ids_at_event = auth_event_ids.copy()

temp_event = await builder.build(
prev_event_ids=prev_event_ids,
auth_event_ids=auth_event_ids,
depth=depth,
)
auth_events = await self.store.get_events_as_list(auth_event_ids)
# Create a StateMap[str]
auth_event_state_map = {
(e.type, e.state_key): e.event_id for e in auth_events
}
# Actually strip down and use the necessary auth events
auth_event_ids = self._event_auth_handler.compute_auth_events(
event=temp_event,
current_state_ids=auth_event_state_map,
for_verification=False,
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moved down here so we can get a copy of the full_state_ids_at_event to put in the insertion events below.


if prev_event_ids is not None:
assert (
len(prev_event_ids) <= 10
Expand Down Expand Up @@ -965,6 +969,13 @@ async def create_new_client_event(
if builder.internal_metadata.outlier:
event.internal_metadata.outlier = True
context = EventContext.for_outlier()
elif (
event.type == EventTypes.MSC2716_INSERTION
and full_state_ids_at_event
and builder.internal_metadata.is_historical()
):
old_state = await self.store.get_events_as_list(full_state_ids_at_event)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MadLittleMods, @erikjohnston: I'm trying to figure out how this works, so that I can fit it in with my work on faster joins and partial room state.

so... how does this work? full_state_ids_at_event is set to auth_event_ids, which is absolutely not the full state at the event. Rather, as the docstring says, it is "The event ids to use as the auth_events for the new event." - ie, the subset of the state that is needed to auth the event.

Copy link
Contributor Author

@MadLittleMods MadLittleMods Feb 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The relevant call stacks for this are:

# For events
RoomBatchSendEventRestServlet.on_POST
handle_batch_of_events
persist_historical_events
create_event
create_new_client_event

# For `m.room.member` state events
RoomBatchSendEventRestServlet.on_POST
persist_state_events_at_start
update_membership
update_membership_locked
_local_membership_update
create_event
create_new_client_event

# For non `m.room.member` state events
RoomBatchSendEventRestServlet.on_POST
persist_state_events_at_start
create_and_send_nonmember_event
create_event
create_new_client_event

In RoomBatchSendEventRestServlet.on_POST we fetch the full state for the event we're inserting next to (specified by /batch_send?prev_event_id=xxx) as a base to start from. Then as we persist the state_events_at_start, we also add that to the auth_event_ids list.

That full auth_event_ids list is then passed down that call stack and gets stripped down to only include the necessary bits in the code here. And we use full_state_ids_at_event to "Add explicit state to the insertion event so the rest of the batch can inherit the same state and state_group"


Here is the context where we originally started stripping down the state: #9247 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, thanks for the explanation, but this seems extremely confusing. At the very least, if create_new_client_event and co are expecting auth_event_ids to be the full state rather than just the auth events, then the docstrings should be updated to say so - right now they are flat-out wrong.

But generally, this feels like an abuse of auth_event_ids - we shouldn't be using it to mean "normally the auth events, except under special circumstances, when it is the full state". I'm not sure I have any immediate suggestions, but adding a new parameter would be better than the current situation.

@erikjohnston I think you've been following the MSC2716 work a bit more than me - any thoughts here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I think I got confused when reading this as at some point the state at the insertion event was limited to the used auth events, which is no longer the case.

I agree that we should therefore be passing the full state through as a separate parameter. Though at some point I'm wondering if this code path needs refactoring to make it easier to use in the /send_batch case, as its starting to get a bit unwieldy with all the different cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@richvdh Do you want me to tackle this new full state parameter? Don't want to collide and cause conflicts with your existing refactor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're unlikely to conflict with me here, so please do go ahead!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated via #12083

context = await self.state.compute_event_context(event, old_state=old_state)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add explicit state to the insertion event so the rest of the batch can inherit the same state and state_group

else:
context = await self.state.compute_event_context(event)

Expand Down
40 changes: 30 additions & 10 deletions synapse/handlers/room_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
logger = logging.getLogger(__name__)


def generate_fake_event_id() -> str:
return "$fake_" + random_string(43)


class RoomBatchHandler:
def __init__(self, hs: "HomeServer"):
self.hs = hs
Expand Down Expand Up @@ -177,6 +181,11 @@ async def persist_state_events_at_start(

state_event_ids_at_start = []
auth_event_ids = initial_auth_event_ids.copy()

# Make the state events float off on their own so we don't have a
# bunch of `@mxid joined the room` noise between each batch
prev_event_id_for_state_chain = generate_fake_event_id()
Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still have the state events float off on their own, but now they're in a floating chain instead of individual. This way we can connect the historical events to the state chain via one prev_event.


for state_event in state_events_at_start:
assert_params_in_dict(
state_event, ["type", "origin_server_ts", "content", "sender"]
Expand All @@ -200,10 +209,6 @@ async def persist_state_events_at_start(
# Mark all events as historical
event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True

# Make the state events float off on their own so we don't have a
# bunch of `@mxid joined the room` noise between each batch
fake_prev_event_id = "$" + random_string(43)

# TODO: This is pretty much the same as some other code to handle inserting state in this file
if event_dict["type"] == EventTypes.Member:
membership = event_dict["content"].get("membership", None)
Expand All @@ -216,7 +221,7 @@ async def persist_state_events_at_start(
action=membership,
content=event_dict["content"],
outlier=True,
prev_event_ids=[fake_prev_event_id],
prev_event_ids=[prev_event_id_for_state_chain],
# Make sure to use a copy of this list because we modify it
# later in the loop here. Otherwise it will be the same
# reference and also update in the event when we append later.
Expand All @@ -235,7 +240,7 @@ async def persist_state_events_at_start(
),
event_dict,
outlier=True,
prev_event_ids=[fake_prev_event_id],
prev_event_ids=[prev_event_id_for_state_chain],
# Make sure to use a copy of this list because we modify it
# later in the loop here. Otherwise it will be the same
# reference and also update in the event when we append later.
Expand All @@ -245,6 +250,8 @@ async def persist_state_events_at_start(

state_event_ids_at_start.append(event_id)
auth_event_ids.append(event_id)
# Connect all the state in a floating chain
prev_event_id_for_state_chain = event_id

return state_event_ids_at_start

Expand Down Expand Up @@ -289,6 +296,10 @@ async def persist_historical_events(
for ev in events_to_create:
assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"])

assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % (
ev["sender"],
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this up because it makes sense to assert this earlier rather than later.


event_dict = {
"type": ev["type"],
"origin_server_ts": ev["origin_server_ts"],
Expand All @@ -311,17 +322,26 @@ async def persist_historical_events(
historical=True,
depth=inherited_depth,
)

assert context._state_group

# Normally this is done when persisting the event but we have to
# pre-emptively do it here because we create all the events first,
# then persist them in another pass below. And we want to share
# state_groups across the whole batch so this lookup needs to work
# for the next event in the batch in this loop.
await self.store.store_state_group_id_for_event_id(
event_id=event.event_id,
state_group_id=context._state_group,
)

logger.debug(
"RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s",
event,
prev_event_ids,
auth_event_ids,
)

assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
event.sender,
)

events_to_persist.append((event, context))
event_id = event.event_id

Expand Down
15 changes: 6 additions & 9 deletions synapse/rest/client/room_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from synapse.http.site import SynapseRequest
from synapse.rest.client.transactions import HttpTransactionCache
from synapse.types import JsonDict
from synapse.util.stringutils import random_string

if TYPE_CHECKING:
from synapse.server import HomeServer
Expand Down Expand Up @@ -160,11 +159,6 @@ async def on_POST(
base_insertion_event = None
if batch_id_from_query:
batch_id_to_connect_to = batch_id_from_query
# All but the first base insertion event should point at a fake
# event, which causes the HS to ask for the state at the start of
# the batch later.
fake_prev_event_id = "$" + random_string(43)
prev_event_ids = [fake_prev_event_id]
# Otherwise, create an insertion event to act as a starting point.
#
# We don't always have an insertion event to start hanging more history
Expand All @@ -173,16 +167,14 @@ async def on_POST(
# an insertion event), in which case we just create a new insertion event
# that can then get pointed to by a "marker" event later.
else:
prev_event_ids = prev_event_ids_from_query
Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was actually a bug. We only want the base insertion event to be tied to prev_event_ids_from_query.

Whereas previously, this attached the base and normal insertion event for the first batch (when no ?batch_id was specified)

Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(notice the extra prev_event for the insertion event in chunk0)

Before (expected) Before (actual, because of this bug)
Mermaid live editor Mermaid live editor


base_insertion_event_dict = (
self.room_batch_handler.create_insertion_event_dict(
sender=requester.user.to_string(),
room_id=room_id,
origin_server_ts=last_event_in_batch["origin_server_ts"],
)
)
base_insertion_event_dict["prev_events"] = prev_event_ids.copy()
base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy()

(
base_insertion_event,
Expand All @@ -203,6 +195,11 @@ async def on_POST(
EventContentFields.MSC2716_NEXT_BATCH_ID
]

# Also connect the historical event chain to the end of the floating
# state chain, which causes the HS to ask for the state at the start of
# the batch later.
prev_event_ids = [state_event_ids_at_start[-1]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Connect our historical batch chain to the floating state chain so we still get the floating benefit but also more semantic


# Create and persist all of the historical events as well as insertion
# and batch meta events to make the batch navigable in the DAG.
event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events(
Expand Down
10 changes: 6 additions & 4 deletions synapse/storage/databases/main/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2069,12 +2069,14 @@ def _store_event_state_mappings_txn(

state_groups[event.event_id] = context.state_group

self.db_pool.simple_insert_many_txn(
self.db_pool.simple_upsert_many_txn(
Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed this to an upsert to avoid colliding with a previous entry we put in via store_state_group_id_for_event_id earlier before we persisted the events.

txn,
table="event_to_state_groups",
values=[
{"state_group": state_group_id, "event_id": event_id}
for event_id, state_group_id in state_groups.items()
key_names=["event_id"],
key_values=[[event_id] for event_id, _ in state_groups.items()],
value_names=["state_group"],
value_values=[
[state_group_id] for _, state_group_id in state_groups.items()
],
)

Expand Down
13 changes: 13 additions & 0 deletions synapse/storage/databases/main/room_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,16 @@ async def get_insertion_event_by_batch_id(
retcol="event_id",
allow_none=True,
)

async def store_state_group_id_for_event_id(
self, event_id: str, state_group_id: int
) -> Optional[str]:
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what's going on here. The braces to enclose the await is legit syntax (it'll build a set, like {"hello"}). As written the function is going to return None.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #11310

await self.db_pool.simple_upsert(
table="event_to_state_groups",
keyvalues={"event_id": event_id},
values={"state_group": state_group_id, "event_id": event_id},
# Unique constraint on event_id so we don't have to lock
lock=False,
)
}
6 changes: 5 additions & 1 deletion synapse/storage/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

SCHEMA_VERSION = 64 # remember to update the list below when updating
SCHEMA_VERSION = 65 # remember to update the list below when updating
"""Represents the expectations made by the codebase about the database schema

This should be incremented whenever the codebase changes its requirements on the
Expand Down Expand Up @@ -41,6 +41,10 @@

Changes in SCHEMA_VERSION = 64:
- MSC2716: Rename related tables and columns from "chunks" to "batches".

Changes in SCHEMA_VERSION = 65:
- MSC2716: Remove unique event_id constraint from insertion_event_edges
because an insertion event can have multiple edges.
"""


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- Recreate the insertion_event_edges event_id index without the unique constraint
-- because an insetion event can have multiple edges.
DROP INDEX insertion_event_edges_event_id;
CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id);