From c38abfdf6df4e25638a89ef5aea8d5929072bf83 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 10 Nov 2022 11:46:19 +0100 Subject: [PATCH 1/7] Filter out non local events when a room doesn't have its full state Signed-off-by: Mathieu Velten --- changelog.d/14404.misc | 1 + .../federation/sender/per_destination_queue.py | 1 + synapse/handlers/federation.py | 7 ++++--- synapse/visibility.py | 18 +++++++++++++++--- tests/test_visibility.py | 10 +++++----- 5 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 changelog.d/14404.misc diff --git a/changelog.d/14404.misc b/changelog.d/14404.misc new file mode 100644 index 000000000000..b9ab525f2b33 --- /dev/null +++ b/changelog.d/14404.misc @@ -0,0 +1 @@ +Faster joins: filter out non local events when a room doesn't have its full state. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 084c45a95ca1..3ae5e8634c34 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -505,6 +505,7 @@ async def _catch_up_transmission_loop(self) -> None: new_pdus = await filter_events_for_server( self._storage_controllers, self._destination, + self._server_name, new_pdus, redact=False, ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5fc3b8bc8c3d..3d9c6e1b0209 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -379,6 +379,7 @@ async def _maybe_backfill_inner( filtered_extremities = await filter_events_for_server( self._storage_controllers, self.server_name, + self.server_name, events_to_check, redact=False, check_history_visibility_only=True, @@ -1252,7 +1253,7 @@ async def on_backfill_request( ) events = await filter_events_for_server( - self._storage_controllers, origin, events + self._storage_controllers, origin, self.server_name, events ) return events @@ -1283,7 +1284,7 @@ async def get_persisted_pdu( await self._event_auth_handler.assert_host_in_room(event.room_id, origin) events = await filter_events_for_server( - self._storage_controllers, origin, [event] + self._storage_controllers, origin, self.server_name, [event] ) event = events[0] return event @@ -1309,7 +1310,7 @@ async def on_get_missing_events( ) missing_events = await filter_events_for_server( - self._storage_controllers, origin, missing_events + self._storage_controllers, origin, self.server_name, missing_events ) return missing_events diff --git a/synapse/visibility.py b/synapse/visibility.py index 40a9c5b53f83..8b3d824c35ab 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -563,7 +563,8 @@ def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: async def filter_events_for_server( storage: StorageControllers, - server_name: str, + target_server_name: str, + local_server_name: str, events: List[EventBase], redact: bool = True, check_history_visibility_only: bool = False, @@ -603,7 +604,7 @@ def check_event_is_visible( # if the server is either in the room or has been invited # into the room. for ev in memberships.values(): - assert get_domain_from_id(ev.state_key) == server_name + assert get_domain_from_id(ev.state_key) == target_server_name memtype = ev.membership if memtype == Membership.JOIN: @@ -636,7 +637,7 @@ def check_event_is_visible( if event_to_history_vis[e.event_id] not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE) ], - server_name, + target_server_name, ) to_return = [] @@ -645,6 +646,17 @@ def check_event_is_visible( visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) + + # Filter out non-local events when we are in the middle of a partial join, + # since our servers list can be out of date and we could leak events + # to servers not in the room anymore. + # This can also be true for local events but we consider it to be + # an acceptable risk in this case. + if e.origin != local_server_name and await storage.main.is_partial_state_room( + e.room_id + ): + visible = False + if visible and not erased: to_return.append(e) elif redact: diff --git a/tests/test_visibility.py b/tests/test_visibility.py index c385b2f8d479..d0b9ad54540d 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -61,7 +61,7 @@ def test_filtering(self) -> None: filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", events_to_filter + self._storage_controllers, "test_server", "hs", events_to_filter ) ) @@ -83,7 +83,7 @@ def test_filter_outlier(self) -> None: self.assertEqual( self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", [outlier] + self._storage_controllers, "remote_hs", "hs", [outlier] ) ), [outlier], @@ -94,7 +94,7 @@ def test_filter_outlier(self) -> None: filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", [outlier, evt] + self._storage_controllers, "remote_hs", "local_hs", [outlier, evt] ) ) self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}") @@ -106,7 +106,7 @@ def test_filter_outlier(self) -> None: # be redacted) filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "other_server", [outlier, evt] + self._storage_controllers, "other_server", "local_hs", [outlier, evt] ) ) self.assertEqual(filtered[0], outlier) @@ -141,7 +141,7 @@ def test_erased_user(self) -> None: # ... and the filtering happens. filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", events_to_filter + self._storage_controllers, "test_server", "local_hs", events_to_filter ) ) From 7fb7757fd9daa666a6ad2a893834f77eceda1a77 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 14 Nov 2022 18:05:08 +0100 Subject: [PATCH 2/7] Fix race --- synapse/visibility.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8b3d824c35ab..33b2b15e4aad 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -623,6 +623,21 @@ def check_event_is_visible( # to no users having been erased. erased_senders = {} + # Filter out non-local events when we are in the middle of a partial join, since our servers + # list can be out of date and we could leak events to servers not in the room anymore. + # This can also be true for local events but we consider it to be an acceptable risk. + + # We do this check as a first step and before retrieving membership events because + # otherwise a room could be fully joined after we retrieve those, which would then bypass + # this check but would base the filtering on an outdated view of the membership events. + + partial_state_invisible_events = set() + for e in events: + if e.origin != local_server_name and await storage.main.is_partial_state_room( + e.room_id + ): + partial_join_invisible_events.add(e) + # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't # need to check membership (as we know the server is in the room). @@ -647,14 +662,7 @@ def check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) - # Filter out non-local events when we are in the middle of a partial join, - # since our servers list can be out of date and we could leak events - # to servers not in the room anymore. - # This can also be true for local events but we consider it to be - # an acceptable risk in this case. - if e.origin != local_server_name and await storage.main.is_partial_state_room( - e.room_id - ): + if e in partial_state_invisible_events: visible = False if visible and not erased: From ed3d7e38726d64c026c62abf34b2c4c2a220b0f7 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 14 Nov 2022 18:32:48 +0100 Subject: [PATCH 3/7] Allow partial joins in assert_host_in_room for missing_events and backfill --- synapse/handlers/federation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3d9c6e1b0209..ce17f47d49b7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1232,7 +1232,9 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: async def on_backfill_request( self, origin: str, room_id: str, pdu_list: List[str], limit: int ) -> List[EventBase]: - await self._event_auth_handler.assert_host_in_room(room_id, origin) + # We allow partially joined rooms since in this case we are filtering out + # non-local events in `filter_events_for_server`. + await self._event_auth_handler.assert_host_in_room(room_id, origin, True) # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) @@ -1297,7 +1299,9 @@ async def on_get_missing_events( latest_events: List[str], limit: int, ) -> List[EventBase]: - await self._event_auth_handler.assert_host_in_room(room_id, origin) + # We allow partially joined rooms since in this case we are filtering out + # non-local events in `filter_events_for_server`. + await self._event_auth_handler.assert_host_in_room(room_id, origin, True) # Only allow up to 20 events to be retrieved per request. limit = min(limit, 20) From 5a1893fd317b16bc7b05aa570a562f2ec3b1ecf7 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 14 Nov 2022 18:39:46 +0100 Subject: [PATCH 4/7] Typo --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 33b2b15e4aad..c3f98a3fe836 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -636,7 +636,7 @@ def check_event_is_visible( if e.origin != local_server_name and await storage.main.is_partial_state_room( e.room_id ): - partial_join_invisible_events.add(e) + partial_state_invisible_events.add(e) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't From 3f5cf6f31d945093ef4f3460f250aa3ad5a633e8 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 17 Nov 2022 18:44:26 +0100 Subject: [PATCH 5/7] Do not filter out non local events when we want to backfill --- synapse/handlers/federation.py | 1 + synapse/visibility.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ce17f47d49b7..b2c991bfb289 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -383,6 +383,7 @@ async def _maybe_backfill_inner( events_to_check, redact=False, check_history_visibility_only=True, + filter_partial_state=False, ) if filtered_extremities: extremities_to_request.append(bp.event_id) diff --git a/synapse/visibility.py b/synapse/visibility.py index c3f98a3fe836..9e998bc4e941 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -568,6 +568,7 @@ async def filter_events_for_server( events: List[EventBase], redact: bool = True, check_history_visibility_only: bool = False, + filter_partial_state: bool = True, ) -> List[EventBase]: """Filter a list of events based on whether given server is allowed to see them. @@ -633,8 +634,10 @@ def check_event_is_visible( partial_state_invisible_events = set() for e in events: - if e.origin != local_server_name and await storage.main.is_partial_state_room( - e.room_id + if ( + filter_partial_state + and e.origin != local_server_name + and await storage.main.is_partial_state_room(e.room_id) ): partial_state_invisible_events.add(e) From bd7ba64cb1844e7b3b954640b06e56088a9037dd Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Fri, 18 Nov 2022 13:13:01 +0100 Subject: [PATCH 6/7] Reuse check_history_visibility_only instead --- synapse/handlers/federation.py | 1 - synapse/visibility.py | 15 +++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b2c991bfb289..ce17f47d49b7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -383,7 +383,6 @@ async def _maybe_backfill_inner( events_to_check, redact=False, check_history_visibility_only=True, - filter_partial_state=False, ) if filtered_extremities: extremities_to_request.append(bp.event_id) diff --git a/synapse/visibility.py b/synapse/visibility.py index 9e998bc4e941..010e021fe8cf 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -568,7 +568,6 @@ async def filter_events_for_server( events: List[EventBase], redact: bool = True, check_history_visibility_only: bool = False, - filter_partial_state: bool = True, ) -> List[EventBase]: """Filter a list of events based on whether given server is allowed to see them. @@ -633,13 +632,13 @@ def check_event_is_visible( # this check but would base the filtering on an outdated view of the membership events. partial_state_invisible_events = set() - for e in events: - if ( - filter_partial_state - and e.origin != local_server_name - and await storage.main.is_partial_state_room(e.room_id) - ): - partial_state_invisible_events.add(e) + if not check_history_visibility_only: + for e in events: + if ( + e.origin != local_server_name + and await storage.main.is_partial_state_room(e.room_id) + ): + partial_state_invisible_events.add(e) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't From 6f4a0b62663228ea5e055286b6ccb9fae5609b17 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Fri, 18 Nov 2022 15:37:18 +0100 Subject: [PATCH 7/7] use get_domain_from_id(e.sender) instead of e.origin --- synapse/visibility.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 010e021fe8cf..b443857571b7 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -634,8 +634,9 @@ def check_event_is_visible( partial_state_invisible_events = set() if not check_history_visibility_only: for e in events: + sender_domain = get_domain_from_id(e.sender) if ( - e.origin != local_server_name + sender_domain != local_server_name and await storage.main.is_partial_state_room(e.room_id) ): partial_state_invisible_events.add(e)