Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Add ability to shard the federation sender (#7798)
Browse files Browse the repository at this point in the history
  • Loading branch information
erikjohnston committed Jul 10, 2020
1 parent f1245dc commit f299441
Show file tree
Hide file tree
Showing 15 changed files with 670 additions and 157 deletions.
1 change: 1 addition & 0 deletions changelog.d/7798.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add experimental support for running multiple federation sender processes.
65 changes: 33 additions & 32 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,38 +118,6 @@ pid_file: DATADIR/homeserver.pid
#
#enable_search: false

# Restrict federation to the following whitelist of domains.
# N.B. we recommend also firewalling your federation listener to limit
# inbound federation traffic as early as possible, rather than relying
# purely on this application-layer restriction. If not specified, the
# default is to whitelist everything.
#
#federation_domain_whitelist:
# - lon.example.com
# - nyc.example.com
# - syd.example.com

# Prevent federation requests from being sent to the following
# blacklist IP address CIDR ranges. If this option is not specified, or
# specified with an empty list, no ip range blacklist will be enforced.
#
# As of Synapse v1.4.0 this option also affects any outbound requests to identity
# servers provided by user input.
#
# (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
# listed here, since they correspond to unroutable addresses.)
#
federation_ip_range_blacklist:
- '127.0.0.0/8'
- '10.0.0.0/8'
- '172.16.0.0/12'
- '192.168.0.0/16'
- '100.64.0.0/10'
- '169.254.0.0/16'
- '::1/128'
- 'fe80::/64'
- 'fc00::/7'

# List of ports that Synapse should listen on, their purpose and their
# configuration.
#
Expand Down Expand Up @@ -608,6 +576,39 @@ acme:



# Restrict federation to the following whitelist of domains.
# N.B. we recommend also firewalling your federation listener to limit
# inbound federation traffic as early as possible, rather than relying
# purely on this application-layer restriction. If not specified, the
# default is to whitelist everything.
#
#federation_domain_whitelist:
# - lon.example.com
# - nyc.example.com
# - syd.example.com

# Prevent federation requests from being sent to the following
# blacklist IP address CIDR ranges. If this option is not specified, or
# specified with an empty list, no ip range blacklist will be enforced.
#
# As of Synapse v1.4.0 this option also affects any outbound requests to identity
# servers provided by user input.
#
# (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
# listed here, since they correspond to unroutable addresses.)
#
federation_ip_range_blacklist:
- '127.0.0.0/8'
- '10.0.0.0/8'
- '172.16.0.0/12'
- '192.168.0.0/16'
- '100.64.0.0/10'
- '169.254.0.0/16'
- '::1/128'
- 'fe80::/64'
- 'fc00::/7'


## Caching ##

# Caching can be configured through the following options.
Expand Down
59 changes: 16 additions & 43 deletions synapse/app/generic_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,25 +511,7 @@ class GenericWorkerSlavedStore(
SearchWorkerStore,
BaseSlavedStore,
):
def __init__(self, database, db_conn, hs):
super(GenericWorkerSlavedStore, self).__init__(database, db_conn, hs)

# We pull out the current federation stream position now so that we
# always have a known value for the federation position in memory so
# that we don't have to bounce via a deferred once when we start the
# replication streams.
self.federation_out_pos_startup = self._get_federation_out_pos(db_conn)

def _get_federation_out_pos(self, db_conn):
sql = "SELECT stream_id FROM federation_stream_position WHERE type = ?"
sql = self.database_engine.convert_param_style(sql)

txn = db_conn.cursor()
txn.execute(sql, ("federation",))
rows = txn.fetchall()
txn.close()

return rows[0][0] if rows else -1
pass


class GenericWorkerServer(HomeServer):
Expand Down Expand Up @@ -812,19 +794,11 @@ def __init__(self, hs: GenericWorkerServer):
self.federation_sender = hs.get_federation_sender()
self._hs = hs

# if the worker is restarted, we want to pick up where we left off in
# the replication stream, so load the position from the database.
#
# XXX is this actually worthwhile? Whenever the master is restarted, we'll
# drop some rows anyway (which is mostly fine because we're only dropping
# typing and presence notifications). If the replication stream is
# unreliable, why do we do all this hoop-jumping to store the position in the
# database? See also https://github.com/matrix-org/synapse/issues/7535.
#
self.federation_position = self.store.federation_out_pos_startup
# Stores the latest position in the federation stream we've gotten up
# to. This is always set before we use it.
self.federation_position = None

self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
self._last_ack = self.federation_position

def on_start(self):
# There may be some events that are persisted but haven't been sent,
Expand Down Expand Up @@ -932,7 +906,6 @@ async def _save_and_send_ack(self):
# We ACK this token over replication so that the master can drop
# its in memory queues
self._hs.get_tcp_replication().send_federation_ack(current_position)
self._last_ack = current_position
except Exception:
logger.exception("Error updating federation stream position")

Expand Down Expand Up @@ -960,7 +933,7 @@ def start(config_options):
)

if config.worker_app == "synapse.app.appservice":
if config.notify_appservices:
if config.appservice.notify_appservices:
sys.stderr.write(
"\nThe appservices must be disabled in the main synapse process"
"\nbefore they can be run in a separate worker."
Expand All @@ -970,13 +943,13 @@ def start(config_options):
sys.exit(1)

# Force the appservice to start since they will be disabled in the main config
config.notify_appservices = True
config.appservice.notify_appservices = True
else:
# For other worker types we force this to off.
config.notify_appservices = False
config.appservice.notify_appservices = False

if config.worker_app == "synapse.app.pusher":
if config.start_pushers:
if config.server.start_pushers:
sys.stderr.write(
"\nThe pushers must be disabled in the main synapse process"
"\nbefore they can be run in a separate worker."
Expand All @@ -986,13 +959,13 @@ def start(config_options):
sys.exit(1)

# Force the pushers to start since they will be disabled in the main config
config.start_pushers = True
config.server.start_pushers = True
else:
# For other worker types we force this to off.
config.start_pushers = False
config.server.start_pushers = False

if config.worker_app == "synapse.app.user_dir":
if config.update_user_directory:
if config.server.update_user_directory:
sys.stderr.write(
"\nThe update_user_directory must be disabled in the main synapse process"
"\nbefore they can be run in a separate worker."
Expand All @@ -1002,13 +975,13 @@ def start(config_options):
sys.exit(1)

# Force the pushers to start since they will be disabled in the main config
config.update_user_directory = True
config.server.update_user_directory = True
else:
# For other worker types we force this to off.
config.update_user_directory = False
config.server.update_user_directory = False

if config.worker_app == "synapse.app.federation_sender":
if config.send_federation:
if config.federation.send_federation:
sys.stderr.write(
"\nThe send_federation must be disabled in the main synapse process"
"\nbefore they can be run in a separate worker."
Expand All @@ -1018,10 +991,10 @@ def start(config_options):
sys.exit(1)

# Force the pushers to start since they will be disabled in the main config
config.send_federation = True
config.federation.send_federation = True
else:
# For other worker types we force this to off.
config.send_federation = False
config.federation.send_federation = False

synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts

Expand Down
129 changes: 129 additions & 0 deletions synapse/config/federation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
# Copyright 2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from hashlib import sha256
from typing import List, Optional

import attr
from netaddr import IPSet

from ._base import Config, ConfigError


@attr.s
class ShardedFederationSendingConfig:
"""Algorithm for choosing which federation sender instance is responsible
for which destionation host.
"""

instances = attr.ib(type=List[str])

def should_send_to(self, instance_name: str, destination: str) -> bool:
"""Whether this instance is responsible for sending transcations for
the given host.
"""

# If multiple federation senders are not defined we always return true.
if not self.instances or len(self.instances) == 1:
return True

# We shard by taking the hash, modulo it by the number of federation
# senders and then checking whether this instance matches the instance
# at that index.
#
# (Technically this introduces some bias and is not entirely uniform, but
# since the hash is so large the bias is ridiculously small).
dest_hash = sha256(destination.encode("utf8")).digest()
dest_int = int.from_bytes(dest_hash, byteorder="little")
remainder = dest_int % (len(self.instances))
return self.instances[remainder] == instance_name


class FederationConfig(Config):
section = "federation"

def read_config(self, config, **kwargs):
# Whether to send federation traffic out in this process. This only
# applies to some federation traffic, and so shouldn't be used to
# "disable" federation
self.send_federation = config.get("send_federation", True)

federation_sender_instances = config.get("federation_sender_instances") or []
self.federation_shard_config = ShardedFederationSendingConfig(
federation_sender_instances
)

# FIXME: federation_domain_whitelist needs sytests
self.federation_domain_whitelist = None # type: Optional[dict]
federation_domain_whitelist = config.get("federation_domain_whitelist", None)

if federation_domain_whitelist is not None:
# turn the whitelist into a hash for speed of lookup
self.federation_domain_whitelist = {}

for domain in federation_domain_whitelist:
self.federation_domain_whitelist[domain] = True

self.federation_ip_range_blacklist = config.get(
"federation_ip_range_blacklist", []
)

# Attempt to create an IPSet from the given ranges
try:
self.federation_ip_range_blacklist = IPSet(
self.federation_ip_range_blacklist
)

# Always blacklist 0.0.0.0, ::
self.federation_ip_range_blacklist.update(["0.0.0.0", "::"])
except Exception as e:
raise ConfigError(
"Invalid range(s) provided in federation_ip_range_blacklist: %s" % e
)

def generate_config_section(self, config_dir_path, server_name, **kwargs):
return """\
# Restrict federation to the following whitelist of domains.
# N.B. we recommend also firewalling your federation listener to limit
# inbound federation traffic as early as possible, rather than relying
# purely on this application-layer restriction. If not specified, the
# default is to whitelist everything.
#
#federation_domain_whitelist:
# - lon.example.com
# - nyc.example.com
# - syd.example.com
# Prevent federation requests from being sent to the following
# blacklist IP address CIDR ranges. If this option is not specified, or
# specified with an empty list, no ip range blacklist will be enforced.
#
# As of Synapse v1.4.0 this option also affects any outbound requests to identity
# servers provided by user input.
#
# (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
# listed here, since they correspond to unroutable addresses.)
#
federation_ip_range_blacklist:
- '127.0.0.0/8'
- '10.0.0.0/8'
- '172.16.0.0/12'
- '192.168.0.0/16'
- '100.64.0.0/10'
- '169.254.0.0/16'
- '::1/128'
- 'fe80::/64'
- 'fc00::/7'
"""
3 changes: 3 additions & 0 deletions synapse/config/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .consent_config import ConsentConfig
from .database import DatabaseConfig
from .emailconfig import EmailConfig
from .federation import FederationConfig
from .groups import GroupsConfig
from .jwt_config import JWTConfig
from .key import KeyConfig
Expand Down Expand Up @@ -57,6 +58,7 @@ class HomeServerConfig(RootConfig):
config_classes = [
ServerConfig,
TlsConfig,
FederationConfig,
CacheConfig,
DatabaseConfig,
LoggingConfig,
Expand Down Expand Up @@ -90,4 +92,5 @@ class HomeServerConfig(RootConfig):
ThirdPartyRulesConfig,
TracerConfig,
RedisConfig,
FederationConfig,
]
Loading

0 comments on commit f299441

Please sign in to comment.