diff --git a/.circleci/main.yml b/.circleci/main.yml index 67f3e711b19..86684a5d0cc 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -32,7 +32,6 @@ default_environment: &default_environment CIRCLE_TEST_REPORTS: /tmp/circleci-test-results CIRCLE_ARTIFACTS: /tmp/circleci-artifacts GIT_PAGER: cat - IPFS_CHECK_RCMGR_DEFAULTS: 1 executors: golang: diff --git a/config/swarm.go b/config/swarm.go index 01181f36ee5..63119282772 100644 --- a/config/swarm.go +++ b/config/swarm.go @@ -143,6 +143,10 @@ type ResourceMgr struct { // Enables the Network Resource Manager feature, default to on. Enabled Flag `json:",omitempty"` Limits *rcmgr.LimitConfig `json:",omitempty"` + + MaxMemory OptionalString `json:",omitempty"` + MaxFileDescriptors OptionalInteger `json:",omitempty"` + // A list of multiaddrs that can bypass normal system limits (but are still // limited by the allowlist scope). Convenience config around // https://pkg.go.dev/github.com/libp2p/go-libp2p/p2p/host/resource-manager#Allowlist.Add diff --git a/core/commands/swarm.go b/core/commands/swarm.go index 6897e5eb602..1508efcb8a8 100644 --- a/core/commands/swarm.go +++ b/core/commands/swarm.go @@ -63,11 +63,12 @@ ipfs peers in the internet. } const ( - swarmVerboseOptionName = "verbose" - swarmStreamsOptionName = "streams" - swarmLatencyOptionName = "latency" - swarmDirectionOptionName = "direction" - swarmResetLimitsOptionName = "reset" + swarmVerboseOptionName = "verbose" + swarmStreamsOptionName = "streams" + swarmLatencyOptionName = "latency" + swarmDirectionOptionName = "direction" + swarmResetLimitsOptionName = "reset" + swarmUsedResourcesPercentageName = "min-used-limit-perc" ) type peeringResult struct { @@ -340,6 +341,9 @@ The output of this command is JSON. Arguments: []cmds.Argument{ cmds.StringArg("scope", true, false, "scope of the stat report"), }, + Options: []cmds.Option{ + cmds.IntOption(swarmUsedResourcesPercentageName, "Display only resources that are using above the specified percentage"), + }, Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { node, err := cmdenv.GetNode(env) if err != nil { @@ -353,8 +357,10 @@ The output of this command is JSON. if len(req.Arguments) != 1 { return fmt.Errorf("must specify exactly one scope") } + + percentage, _ := req.Options[swarmUsedResourcesPercentageName].(int) scope := req.Arguments[0] - result, err := libp2p.NetStat(node.ResourceManager, scope) + result, err := libp2p.NetStat(node.ResourceManager, scope, percentage) if err != nil { return err } @@ -378,6 +384,7 @@ var swarmLimitCmd = &cmds.Command{ Tagline: "Get or set resource limits for a scope.", LongDescription: `Get or set resource limits for a scope. The scope can be one of the following: +- all -- all limits actually being applied. - system -- limits for the system aggregate resource usage. - transient -- limits for the transient resource usage. - svc: -- limits for the resource usage of a specific service. @@ -435,19 +442,19 @@ Changes made via command line are persisted in the Swarm.ResourceMgr.Limits fiel } } - var result rcmgr.BaseLimit + var result interface{} _, reset := req.Options[swarmResetLimitsOptionName] if reset { result, err = libp2p.NetResetLimit(node.ResourceManager, node.Repo, scope) - if err != nil { - return err - } + } else if scope == "all" { + result, err = libp2p.NetLimitAll(node.ResourceManager) } else { // get scope limit result, err = libp2p.NetLimit(node.ResourceManager, scope) - if err != nil { - return err - } + } + + if err != nil { + return err } b := new(bytes.Buffer) diff --git a/core/node/libp2p/fd/sys_not_unix.go b/core/node/libp2p/fd/sys_not_unix.go new file mode 100644 index 00000000000..c857987480d --- /dev/null +++ b/core/node/libp2p/fd/sys_not_unix.go @@ -0,0 +1,7 @@ +//go:build !linux && !darwin && !windows + +package fd + +func GetNumFDs() int { + return 0 +} diff --git a/core/node/libp2p/fd/sys_unix.go b/core/node/libp2p/fd/sys_unix.go new file mode 100644 index 00000000000..5e417c0fa6d --- /dev/null +++ b/core/node/libp2p/fd/sys_unix.go @@ -0,0 +1,16 @@ +//go:build linux || darwin +// +build linux darwin + +package fd + +import ( + "golang.org/x/sys/unix" +) + +func GetNumFDs() int { + var l unix.Rlimit + if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil { + return 0 + } + return int(l.Cur) +} diff --git a/core/node/libp2p/fd/sys_windows.go b/core/node/libp2p/fd/sys_windows.go new file mode 100644 index 00000000000..eec17f3883f --- /dev/null +++ b/core/node/libp2p/fd/sys_windows.go @@ -0,0 +1,11 @@ +//go:build windows + +package fd + +import ( + "math" +) + +func GetNumFDs() int { + return math.MaxInt +} diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index c9a04338aea..49c9d382399 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -9,10 +9,6 @@ import ( "github.com/benbjohnson/clock" logging "github.com/ipfs/go-log/v2" - config "github.com/ipfs/kubo/config" - "github.com/ipfs/kubo/core/node/helpers" - "github.com/ipfs/kubo/repo" - "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" @@ -21,8 +17,11 @@ import ( rcmgrObs "github.com/libp2p/go-libp2p/p2p/host/resource-manager/obs" "github.com/multiformats/go-multiaddr" "go.opencensus.io/stats/view" - "go.uber.org/fx" + + config "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/core/node/helpers" + "github.com/ipfs/kubo/repo" ) const NetLimitDefaultFilename = "limit.json" @@ -35,7 +34,7 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { var manager network.ResourceManager var opts Libp2pOpts - enabled := cfg.ResourceMgr.Enabled.WithDefault(false) + enabled := cfg.ResourceMgr.Enabled.WithDefault(true) // ENV overrides Config (if present) switch os.Getenv("LIBP2P_RCMGR") { @@ -53,7 +52,10 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { return nil, opts, fmt.Errorf("opening IPFS_PATH: %w", err) } - limits := adjustedDefaultLimits(cfg) + limits, err := createDefaultLimitConfig(cfg) + if err != nil { + return nil, opts, err + } if cfg.ResourceMgr.Limits != nil { l := *cfg.ResourceMgr.Limits @@ -122,14 +124,14 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { } type NetStatOut struct { - System *network.ScopeStat `json:",omitempty"` - Transient *network.ScopeStat `json:",omitempty"` - Services map[string]network.ScopeStat `json:",omitempty"` - Protocols map[string]network.ScopeStat `json:",omitempty"` - Peers map[string]network.ScopeStat `json:",omitempty"` + System *rcmgr.BaseLimit `json:",omitempty"` + Transient *rcmgr.BaseLimit `json:",omitempty"` + Services map[string]rcmgr.BaseLimit `json:",omitempty"` + Protocols map[string]rcmgr.BaseLimit `json:",omitempty"` + Peers map[string]rcmgr.BaseLimit `json:",omitempty"` } -func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { +func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStatOut, error) { var err error var result NetStatOut switch { @@ -139,22 +141,42 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { return result, ErrNoResourceMgr } + limits, err := NetLimitAll(mgr) + if err != nil { + return result, err + } + stat := rapi.Stat() - result.System = &stat.System - result.Transient = &stat.Transient + result.System = compareLimits(scopeToLimit(&stat.System), limits.System, percentage) + result.Transient = compareLimits(scopeToLimit(&stat.Transient), limits.Transient, percentage) if len(stat.Services) > 0 { - result.Services = stat.Services + result.Services = make(map[string]rcmgr.BaseLimit, len(stat.Services)) + for srv, stat := range stat.Services { + ls := limits.Services[srv] + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) + if fstat != nil { + result.Services[srv] = *fstat + } + } } if len(stat.Protocols) > 0 { - result.Protocols = make(map[string]network.ScopeStat, len(stat.Protocols)) + result.Protocols = make(map[string]rcmgr.BaseLimit, len(stat.Protocols)) for proto, stat := range stat.Protocols { - result.Protocols[string(proto)] = stat + ls := limits.Protocols[string(proto)] + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) + if fstat != nil { + result.Protocols[string(proto)] = *fstat + } } } if len(stat.Peers) > 0 { - result.Peers = make(map[string]network.ScopeStat, len(stat.Peers)) + result.Peers = make(map[string]rcmgr.BaseLimit, len(stat.Peers)) for p, stat := range stat.Peers { - result.Peers[p.Pretty()] = stat + ls := limits.Peers[p.Pretty()] + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) + if fstat != nil { + result.Peers[p.Pretty()] = *fstat + } } } @@ -163,7 +185,7 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { case scope == config.ResourceMgrSystemScope: err = mgr.ViewSystem(func(s network.ResourceScope) error { stat := s.Stat() - result.System = &stat + result.System = scopeToLimit(&stat) return nil }) return result, err @@ -171,7 +193,7 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { case scope == config.ResourceMgrTransientScope: err = mgr.ViewTransient(func(s network.ResourceScope) error { stat := s.Stat() - result.Transient = &stat + result.Transient = scopeToLimit(&stat) return nil }) return result, err @@ -180,8 +202,8 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { svc := strings.TrimPrefix(scope, config.ResourceMgrServiceScopePrefix) err = mgr.ViewService(svc, func(s network.ServiceScope) error { stat := s.Stat() - result.Services = map[string]network.ScopeStat{ - svc: stat, + result.Services = map[string]rcmgr.BaseLimit{ + svc: *scopeToLimit(&stat), } return nil }) @@ -191,8 +213,8 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { proto := strings.TrimPrefix(scope, config.ResourceMgrProtocolScopePrefix) err = mgr.ViewProtocol(protocol.ID(proto), func(s network.ProtocolScope) error { stat := s.Stat() - result.Protocols = map[string]network.ScopeStat{ - proto: stat, + result.Protocols = map[string]rcmgr.BaseLimit{ + proto: *scopeToLimit(&stat), } return nil }) @@ -206,8 +228,8 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { } err = mgr.ViewPeer(pid, func(s network.PeerScope) error { stat := s.Stat() - result.Peers = map[string]network.ScopeStat{ - p: stat, + result.Peers = map[string]rcmgr.BaseLimit{ + p: *scopeToLimit(&stat), } return nil }) @@ -218,6 +240,130 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { } } +var scopes = []string{ + config.ResourceMgrSystemScope, + config.ResourceMgrTransientScope, + config.ResourceMgrServiceScopePrefix, + config.ResourceMgrProtocolScopePrefix, + config.ResourceMgrPeerScopePrefix, +} + +func scopeToLimit(s *network.ScopeStat) *rcmgr.BaseLimit { + return &rcmgr.BaseLimit{ + Streams: s.NumStreamsInbound + s.NumStreamsOutbound, + StreamsInbound: s.NumStreamsInbound, + StreamsOutbound: s.NumStreamsOutbound, + Conns: s.NumConnsInbound + s.NumConnsOutbound, + ConnsInbound: s.NumConnsInbound, + ConnsOutbound: s.NumConnsOutbound, + FD: s.NumFD, + Memory: s.Memory, + } +} + +// compareLimits compares stat and limit. +// If any of the stats value are equals or above the specified percentage, +// stat object is returned. +func compareLimits(stat, limit *rcmgr.BaseLimit, percentage int) *rcmgr.BaseLimit { + if stat == nil || limit == nil { + return nil + } + if abovePercentage(int(stat.Memory), int(limit.Memory), percentage) { + return stat + } + if abovePercentage(stat.ConnsInbound, limit.ConnsInbound, percentage) { + return stat + } + if abovePercentage(stat.ConnsOutbound, limit.ConnsOutbound, percentage) { + return stat + } + if abovePercentage(stat.Conns, limit.Conns, percentage) { + return stat + } + if abovePercentage(stat.FD, limit.FD, percentage) { + return stat + } + if abovePercentage(stat.StreamsInbound, limit.StreamsInbound, percentage) { + return stat + } + if abovePercentage(stat.StreamsOutbound, limit.StreamsOutbound, percentage) { + return stat + } + if abovePercentage(stat.Streams, limit.Streams, percentage) { + return stat + } + + return nil +} + +func abovePercentage(v1, v2, percentage int) bool { + if percentage == 0 { + return true + } + + if v2 == 0 { + return false + } + + return int((v1/v2))*100 >= percentage +} + +func NetLimitAll(mgr network.ResourceManager) (*NetStatOut, error) { + var result = &NetStatOut{} + lister, ok := mgr.(rcmgr.ResourceManagerState) + if !ok { // NullResourceManager + return result, ErrNoResourceMgr + } + + for _, s := range scopes { + switch s { + case config.ResourceMgrSystemScope: + s, err := NetLimit(mgr, config.ResourceMgrSystemScope) + if err != nil { + return nil, err + } + result.System = &s + case config.ResourceMgrTransientScope: + s, err := NetLimit(mgr, config.ResourceMgrSystemScope) + if err != nil { + return nil, err + } + result.Transient = &s + case config.ResourceMgrServiceScopePrefix: + result.Services = make(map[string]rcmgr.BaseLimit) + for _, serv := range lister.ListServices() { + s, err := NetLimit(mgr, config.ResourceMgrServiceScopePrefix+serv) + if err != nil { + return nil, err + } + result.Services[serv] = s + } + case config.ResourceMgrProtocolScopePrefix: + result.Protocols = make(map[string]rcmgr.BaseLimit) + for _, prot := range lister.ListProtocols() { + ps := string(prot) + s, err := NetLimit(mgr, config.ResourceMgrProtocolScopePrefix+ps) + if err != nil { + return nil, err + } + result.Protocols[ps] = s + } + case config.ResourceMgrPeerScopePrefix: + result.Peers = make(map[string]rcmgr.BaseLimit) + for _, peer := range lister.ListPeers() { + ps := peer.Pretty() + s, err := NetLimit(mgr, config.ResourceMgrPeerScopePrefix+ps) + if err != nil { + return nil, err + } + result.Peers[ps] = s + } + } + } + + return result, nil +} + func NetLimit(mgr network.ResourceManager, scope string) (rcmgr.BaseLimit, error) { var result rcmgr.BaseLimit getLimit := func(s network.ResourceScope) error { @@ -367,7 +513,10 @@ func NetResetLimit(mgr network.ResourceManager, repo repo.Repo, scope string) (r return result, fmt.Errorf("reading config to reset limit: %w", err) } - defaults := adjustedDefaultLimits(cfg.Swarm) + defaults, err := createDefaultLimitConfig(cfg.Swarm) + if err != nil { + return result, fmt.Errorf("creating default limit config: %w", err) + } if cfg.Swarm.ResourceMgr.Limits == nil { cfg.Swarm.ResourceMgr.Limits = &rcmgr.LimitConfig{} diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index 8d77e56aa06..3ff8b55dd26 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -1,832 +1,208 @@ package libp2p import ( - "encoding/json" - "fmt" - "math/bits" - "os" - "strings" + "math" - "github.com/ipfs/kubo/config" + "github.com/dustin/go-humanize" "github.com/libp2p/go-libp2p" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" + "github.com/pbnjay/memory" - "github.com/wI2L/jsondiff" + "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/core/node/libp2p/fd" ) -// This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled - -// adjustedDefaultLimits allows for tweaking defaults based on external factors, -// such as values in Swarm.ConnMgr.HiWater config. -func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { - // Run checks to avoid introducing regressions - if os.Getenv("IPFS_CHECK_RCMGR_DEFAULTS") != "" { - checkImplicitDefaults() - } - defaultLimits := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&defaultLimits) - - // Adjust limits - // (based on https://github.com/filecoin-project/lotus/pull/8318/files) - // - if Swarm.ConnMgr.HighWater is too high, adjust Conn/FD/Stream limits - - // Outbound conns and FDs are set very high to allow for the accelerated DHT client to (re)load its routing table. - // Currently it doesn't gracefully handle RM throttling--once it does we can lower these. - // High outbound conn limits are considered less of a DoS risk than high inbound conn limits. - // Also note that, due to the behavior of the accelerated DHT client, we don't need many streams, just conns. - if minOutbound := 65536; defaultLimits.SystemBaseLimit.ConnsOutbound < minOutbound { - defaultLimits.SystemBaseLimit.ConnsOutbound = minOutbound - } - if minFD := 4096; defaultLimits.SystemBaseLimit.FD < minFD { - defaultLimits.SystemBaseLimit.FD = minFD - } - defaultLimitConfig := defaultLimits.AutoScale() - - // Do we need to adjust due to Swarm.ConnMgr.HighWater? - if cfg.ConnMgr.Type == "basic" { - maxconns := cfg.ConnMgr.HighWater - if 2*maxconns > defaultLimitConfig.System.ConnsInbound { - // adjust conns to 2x to allow for two conns per peer (TCP+QUIC) - defaultLimitConfig.System.ConnsInbound = logScale(2 * maxconns) - defaultLimitConfig.System.ConnsOutbound = logScale(2 * maxconns) - defaultLimitConfig.System.Conns = logScale(4 * maxconns) - - defaultLimitConfig.System.StreamsInbound = logScale(16 * maxconns) - defaultLimitConfig.System.StreamsOutbound = logScale(64 * maxconns) - defaultLimitConfig.System.Streams = logScale(64 * maxconns) - - if 2*maxconns > defaultLimitConfig.System.FD { - defaultLimitConfig.System.FD = logScale(2 * maxconns) - } - - defaultLimitConfig.ServiceDefault.StreamsInbound = logScale(8 * maxconns) - defaultLimitConfig.ServiceDefault.StreamsOutbound = logScale(32 * maxconns) - defaultLimitConfig.ServiceDefault.Streams = logScale(32 * maxconns) - - defaultLimitConfig.ProtocolDefault.StreamsInbound = logScale(8 * maxconns) - defaultLimitConfig.ProtocolDefault.StreamsOutbound = logScale(32 * maxconns) - defaultLimitConfig.ProtocolDefault.Streams = logScale(32 * maxconns) - - log.Info("adjusted default resource manager limits") - } - - } - - return defaultLimitConfig +// We are doing some magic when parsing config files (we are using a map[string]interface{} to compare config files). +// When you don't have a type the JSON Parse function cast numbers to float64 by default, +// losing precision when writing the final number. So if we use math.MaxInt as our infinite number, +// after writing the config file we will have 9223372036854776000 instead of 9223372036854775807, +// making the parsing process fail. +const bigEnough = math.MaxInt / 2 + +var infiniteBaseLimit = rcmgr.BaseLimit{ + Streams: bigEnough, + StreamsInbound: bigEnough, + StreamsOutbound: bigEnough, + Conns: bigEnough, + ConnsInbound: bigEnough, + ConnsOutbound: bigEnough, + FD: bigEnough, + Memory: bigEnough, } -func logScale(val int) int { - bitlen := bits.Len(uint(val)) - return 1 << bitlen +var noLimitIncrease = rcmgr.BaseLimitIncrease{ + ConnsInbound: 0, + ConnsOutbound: 0, + Conns: 0, + StreamsInbound: 0, + StreamsOutbound: 0, + Streams: 0, + Memory: 0, + FDFraction: 0, } -// checkImplicitDefaults compares libp2p defaults agains expected ones -// and panics when they don't match. This ensures we are not surprised -// by silent default limit changes when we update go-libp2p dependencies. -func checkImplicitDefaults() { - ok := true +// This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled - // Check 1: did go-libp2p-resource-manager's DefaultLimits change? - defaults, err := json.Marshal(rcmgr.DefaultLimits) +// createDefaultLimitConfig creates LimitConfig to pass to libp2p's resource manager. +// libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. +// The intent of the default config here is to provide good defaults, +// and where the defaults aren't good enough, +// to expose a good set of higher-level "knobs" to users to satisfy most use cases +// without requiring users to wade into all the intricacies of libp2p's resource manager. +// +// The inputs one can specify in SwarmConfig are: +// - cfg.ResourceMgr.MaxMemory: This is the max amount of memory in bytes to allow libp2p to use. +// libp2p's resource manager will prevent additional resource creation while this limit is hit. +// If this value isn't specified, 1/8th of the total system memory is used. +// - cfg.ResourceMgr.MaxFileDescriptors: This is the maximum number of file descriptors to allow libp2p to use. +// libp2p's resource manager will prevent additional file descriptor consumption while this limit is hit. +// If this value isn't specified, the maximum between 1/2 of system FD limit and 4096 is used. +// - Swarm.ConnMgr.HighWater: If a connection manager is specified, libp2p's resource manager +// will allow 2x more connections than the HighWater mark +// so the connection manager has "space and time" to close "least useful" connections. +// +// With these inputs defined, limits are created at the system, transient, and peer scopes. +// Other scopes are ignored (by being set to infinity). +// The reason these scopes are chosen is because: +// - system - This gives us the coarse-grained control we want so we can reason about the system as a whole. +// It is the backstop, and allows us to reason about resource consumption more easily +// since don't have think about the interaction of many other scopes. +// - transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. +// - peer - The peer scope doesn't protect us against intentional DoS attacks. +// It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. +// We are reliant on the system scope for protection here in the malicious case. +// The reason for having a peer scope is to protect against unintentional DoS attacks +// (e.g., bug in a peer which is causing it to "misbehave"). +// In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. +// +// Within these scopes, limits are just set on memory, FD, and inbound connections/streams. +// Limits are set based on the inputs above. +// We trust this node to behave properly and thus ignore outbound connection/stream limits. +// We apply any limits that libp2p has for its protocols/services +// since we assume libp2p knows best here. +// +// This leaves 3 levels of resource management protection: +// 1. The user who does nothing and uses defaults - In this case they get some sane defaults +// based on the amount of memory and file descriptors their system has. +// This should protect the node from many attacks. +// 2. Slightly more advanced user - They can tweak the above by passing in config on +// maxMemory, maxFD, or maxConns with Swarm.HighWater.ConnMgr. +// 3. Power user - They specify all the limits they want set via Swarm.ResourceMgr.Limits +// and we don't do any defaults/overrides. We pass that config blindly into libp2p resource manager. +func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) { + maxMemoryDefaultString := humanize.Bytes(uint64(memory.TotalMemory()) / 8) + maxMemoryString := cfg.ResourceMgr.MaxMemory.WithDefault(maxMemoryDefaultString) + maxMemory, err := humanize.ParseBytes(maxMemoryString) if err != nil { - log.Fatal(err) - } - changes, err := jsonDiff([]byte(expectedDefaultLimits), defaults) - if err != nil { - log.Fatal(err) - } - if len(changes) > 0 { - ok = false - log.Errorf("===> OOF! go-libp2p-resource-manager changed DefaultLimits\n"+ - "=> changes ('test' represents the old value):\n%s\n"+ - "=> go-libp2p-resource-manager DefaultLimits update needs a review:\n"+ - "Please inspect if changes impact go-ipfs users, and update expectedDefaultLimits in rcmgr_defaults.go to remove this message", - strings.Join(changes, "\n"), - ) + return rcmgr.LimitConfig{}, err } - // Check 2: did go-libp2p's SetDefaultServiceLimits change? - // We compare the baseline (min specs), and check if we went down in any limits. - l := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&l) - limits := l.AutoScale() - testLimiter := rcmgr.NewFixedLimiter(limits) - - serviceDefaults, err := json.Marshal(testLimiter) - if err != nil { - log.Fatal(err) - } - changes, err = jsonDiff([]byte(expectedDefaultServiceLimits), serviceDefaults) - if err != nil { - log.Fatal(err) + numFD := cfg.ResourceMgr.MaxFileDescriptors.WithDefault(int64(fd.GetNumFDs()) / 2) + + scalingLimitConfig := rcmgr.ScalingLimitConfig{ + SystemBaseLimit: rcmgr.BaseLimit{ + Memory: int64(maxMemory), + FD: int(numFD), + + // By default, we just limit connections on the inbound side. + // Note that the limit gets adjusted below if "cfg.ConnMgr.HighWater" is set. + Conns: bigEnough, + ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default + ConnsOutbound: bigEnough, + + // We limit streams since they not only take up memory and CPU. + // The Memory limit protects us on the memory side, + // but a StreamsInbound limit helps protect against unbound CPU consumption from stream processing. + Streams: bigEnough, + StreamsInbound: rcmgr.DefaultLimits.SystemBaseLimit.StreamsInbound, + StreamsOutbound: bigEnough, + }, + // Most limits don't see an increase because they're already infinite/bigEnough or at their max value. + // The values that should scale based on the amount of memory allocated to libp2p need to increase accordingly. + SystemLimitIncrease: rcmgr.BaseLimitIncrease{ + Memory: rcmgr.DefaultLimits.SystemLimitIncrease.Memory, + FDFraction: rcmgr.DefaultLimits.SystemLimitIncrease.FDFraction, + + Conns: 0, + ConnsInbound: rcmgr.DefaultLimits.SystemLimitIncrease.ConnsInbound, + ConnsOutbound: 0, + + Streams: 0, + StreamsInbound: rcmgr.DefaultLimits.SystemLimitIncrease.StreamsInbound, + StreamsOutbound: 0, + }, + + // Just go with what libp2p does + TransientBaseLimit: rcmgr.DefaultLimits.TransientBaseLimit, + TransientLimitIncrease: rcmgr.DefaultLimits.TransientLimitIncrease, + + // Lets get out of the way of the allow list functionality. + // If someone specified "Swarm.ResourceMgr.Allowlist" we should let it go through. + AllowlistedSystemBaseLimit: infiniteBaseLimit, + AllowlistedSystemLimitIncrease: noLimitIncrease, + + AllowlistedTransientBaseLimit: infiniteBaseLimit, + AllowlistedTransientLimitIncrease: noLimitIncrease, + + // Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Conn, or Stream limits. + ServiceBaseLimit: infiniteBaseLimit, + ServiceLimitIncrease: noLimitIncrease, + + ServicePeerBaseLimit: infiniteBaseLimit, + ServicePeerLimitIncrease: noLimitIncrease, + + ProtocolBaseLimit: infiniteBaseLimit, + ProtocolLimitIncrease: noLimitIncrease, + + ProtocolPeerBaseLimit: infiniteBaseLimit, + ProtocolPeerLimitIncrease: noLimitIncrease, + + ConnBaseLimit: infiniteBaseLimit, + ConnLimitIncrease: noLimitIncrease, + + StreamBaseLimit: infiniteBaseLimit, + StreamLimitIncrease: noLimitIncrease, + + // Limit the resources consumed by a peer. + // This doesn't protect us against intentional DoS attacks since an attacker can easily spin up multiple peers. + // We specify this limit against unintentional DoS attacks (e.g., a peer has a bug and is sending too much traffic intentionally). + // In that case we want to keep that peer's resource consumption contained. + // To keep this simple, we only constrain inbound connections and streams. + PeerBaseLimit: rcmgr.BaseLimit{ + Memory: bigEnough, + FD: bigEnough, + Conns: bigEnough, + ConnsInbound: rcmgr.DefaultLimits.PeerBaseLimit.ConnsInbound, + ConnsOutbound: bigEnough, + Streams: bigEnough, + StreamsInbound: rcmgr.DefaultLimits.PeerBaseLimit.StreamsInbound, + StreamsOutbound: bigEnough, + }, + // Most limits don't see an increase because they're already infinite/bigEnough. + // The values that should scale based on the amount of memory allocated to libp2p need to increase accordingly. + PeerLimitIncrease: rcmgr.BaseLimitIncrease{ + Memory: 0, + FDFraction: 0, + Conns: 0, + ConnsInbound: rcmgr.DefaultLimits.PeerLimitIncrease.ConnsInbound, + ConnsOutbound: 0, + Streams: 0, + StreamsInbound: rcmgr.DefaultLimits.PeerLimitIncrease.StreamsInbound, + StreamsOutbound: 0, + }, } - if len(changes) > 0 { - oldState := map[string]int{} - type Op struct { - Op string - Path string - Value int - } - for _, changeStr := range changes { - change := Op{} - err := json.Unmarshal([]byte(changeStr), &change) - if err != nil { - continue - } - if change.Op == "test" { - oldState[change.Path] = change.Value - } - } - for _, changeStr := range changes { - change := Op{} - err := json.Unmarshal([]byte(changeStr), &change) - if err != nil { - continue - } - if change.Op == "replace" { - oldVal, okFound := oldState[change.Path] - if okFound && oldVal > change.Value { - ok = false - fmt.Printf("reduced value for %s. Old: %v; new: %v\n", change.Path, oldVal, change.Value) - } - } - } + // Whatever limits libp2p has specifically tuned for its protocols/services we'll apply. + libp2p.SetDefaultServiceLimits(&scalingLimitConfig) - if !ok { - log.Errorf("===> OOF! go-libp2p changed DefaultServiceLimits\n" + - "=> See the aboce reduced values for info.\n" + - "=> go-libp2p SetDefaultServiceLimits update needs a review:\n" + - "Please inspect if changes impact go-ipfs users, and update expectedDefaultServiceLimits in rcmgr_defaults.go to remove this message", - ) - } - } - if !ok { - log.Fatal("daemon will refuse to run with the resource manager until this is resolved") - } -} + defaultLimitConfig := scalingLimitConfig.Scale(int64(maxMemory), int(numFD)) -// jsonDiff compares two strings and returns diff in JSON Patch format -func jsonDiff(old []byte, updated []byte) ([]string, error) { - // generate 'invertible' patch which includes old values as "test" op - patch, err := jsondiff.CompareJSONOpts(old, updated, jsondiff.Invertible()) - changes := make([]string, len(patch)) - if err != nil { - return changes, err - } - for i, op := range patch { - changes[i] = fmt.Sprintf(" %s", op) + // If a high water mark is set: + if cfg.ConnMgr.Type == "basic" { + // set the connection limit higher than high water mark so that the ConnMgr has "space and time" to close "least useful" connections. + defaultLimitConfig.System.Conns = 2 * cfg.ConnMgr.HighWater + log.Info("adjusted default resource manager System.Conns limits to match ConnMgr.HighWater value of %s", cfg.ConnMgr.HighWater) } - return changes, nil -} -// https://github.com/libp2p/go-libp2p/blob/v0.22.0/p2p/host/resource-manager/limit_defaults.go#L343 -const expectedDefaultLimits = `{ - "SystemBaseLimit": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "SystemLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "Memory": 1073741824, - "FDFraction": 1 - }, - "TransientBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "TransientLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 32, - "ConnsInbound": 16, - "ConnsOutbound": 32, - "Memory": 134217728, - "FDFraction": 0.25 - }, - "AllowlistedSystemBaseLimit": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "AllowlistedSystemLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "Memory": 1073741824, - "FDFraction": 1 - }, - "AllowlistedTransientBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "AllowlistedTransientLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 32, - "ConnsInbound": 16, - "ConnsOutbound": 32, - "Memory": 134217728, - "FDFraction": 0.25 - }, - "ServiceBaseLimit": { - "Streams": 4096, - "StreamsInbound": 1024, - "StreamsOutbound": 4096, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "ServiceLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 134217728, - "FDFraction": 0 - }, - "ServiceLimits": null, - "ServicePeerBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ServicePeerLimitIncrease": { - "Streams": 8, - "StreamsInbound": 4, - "StreamsOutbound": 8, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 4194304, - "FDFraction": 0 - }, - "ServicePeerLimits": null, - "ProtocolBaseLimit": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "ProtocolLimitIncrease": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 171966464, - "FDFraction": 0 - }, - "ProtocolLimits": null, - "ProtocolPeerBaseLimit": { - "Streams": 256, - "StreamsInbound": 64, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ProtocolPeerLimitIncrease": { - "Streams": 16, - "StreamsInbound": 4, - "StreamsOutbound": 8, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 4, - "FDFraction": 0 - }, - "ProtocolPeerLimits": null, - "PeerBaseLimit": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 8, - "ConnsInbound": 4, - "ConnsOutbound": 8, - "FD": 4, - "Memory": 67108864 - }, - "PeerLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 134217728, - "FDFraction": 0.015625 - }, - "PeerLimits": null, - "ConnBaseLimit": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 1, - "ConnsInbound": 1, - "ConnsOutbound": 1, - "FD": 1, - "Memory": 33554432 - }, - "ConnLimitIncrease": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 0, - "FDFraction": 0 - }, - "StreamBaseLimit": { - "Streams": 1, - "StreamsInbound": 1, - "StreamsOutbound": 1, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "StreamLimitIncrease": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 0, - "FDFraction": 0 - } -}` - -// Generated from the default limits and scaling to 0 (base limit). -const expectedDefaultServiceLimits = `{ - "System": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "Transient": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "AllowlistedSystem": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "AllowlistedTransient": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "ServiceDefault": { - "Streams": 4096, - "StreamsInbound": 1024, - "StreamsOutbound": 4096, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "Service": { - "libp2p.autonat": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.holepunch": { - "Streams": 64, - "StreamsInbound": 32, - "StreamsOutbound": 32, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.identify": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.ping": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.relay/v1": { - "Streams": 256, - "StreamsInbound": 256, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "libp2p.relay/v2": { - "Streams": 256, - "StreamsInbound": 256, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - } - }, - "ServicePeerDefault": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ServicePeer": { - "libp2p.autonat": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.holepunch": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.identify": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.ping": { - "Streams": 4, - "StreamsInbound": 2, - "StreamsOutbound": 3, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "libp2p.relay/v1": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.relay/v2": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - } - }, - "ProtocolDefault": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "Protocol": { - "/ipfs/id/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/ipfs/id/push/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/ipfs/ping/1.0.0": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/libp2p/autonat/1.0.0": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/libp2p/circuit/relay/0.1.0": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/circuit/relay/0.2.0/hop": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/circuit/relay/0.2.0/stop": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/dcutr": { - "Streams": 64, - "StreamsInbound": 32, - "StreamsOutbound": 32, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/p2p/id/delta/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - } - }, - "ProtocolPeerDefault": { - "Streams": 256, - "StreamsInbound": 64, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ProtocolPeer": { - "/ipfs/id/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/ipfs/id/push/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/ipfs/ping/1.0.0": { - "Streams": 4, - "StreamsInbound": 2, - "StreamsOutbound": 3, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/libp2p/autonat/1.0.0": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "/libp2p/circuit/relay/0.1.0": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/circuit/relay/0.2.0/hop": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/circuit/relay/0.2.0/stop": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/dcutr": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "/p2p/id/delta/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - } - }, - "PeerDefault": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 8, - "ConnsInbound": 4, - "ConnsOutbound": 8, - "FD": 4, - "Memory": 67108864 - }, - "Conn": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 1, - "ConnsInbound": 1, - "ConnsOutbound": 1, - "FD": 1, - "Memory": 1048576 - }, - "Stream": { - "Streams": 1, - "StreamsInbound": 1, - "StreamsOutbound": 1, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - } -}` + return defaultLimitConfig, nil +} diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 4b50cdc2ef9..34742dd1d7f 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -22,7 +22,7 @@ type loggingResourceManager struct { logInterval time.Duration mut sync.Mutex - limitExceededErrs uint64 + limitExceededErrs map[string]int } type loggingScope struct { @@ -47,11 +47,17 @@ func (n *loggingResourceManager) start(ctx context.Context) { case <-ticker.C: n.mut.Lock() errs := n.limitExceededErrs - n.limitExceededErrs = 0 - n.mut.Unlock() - if errs != 0 { - n.logger.Warnf("Resource limits were exceeded %d times, consider inspecting logs and raising the resource manager limits.", errs) + n.limitExceededErrs = make(map[string]int) + + for e, count := range errs { + n.logger.Errorf("Resource limits were exceeded %d times with error %q.", count, e) } + + if len(errs) != 0 { + n.logger.Errorf("Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr") + } + + n.mut.Unlock() case <-ctx.Done(): return } @@ -62,7 +68,16 @@ func (n *loggingResourceManager) start(ctx context.Context) { func (n *loggingResourceManager) countErrs(err error) { if errors.Is(err, network.ErrResourceLimitExceeded) { n.mut.Lock() - n.limitExceededErrs++ + if n.limitExceededErrs == nil { + n.limitExceededErrs = make(map[string]int) + } + + // we need to unwrap the error to get the limit scope and the kind of reached limit + eout := errors.Unwrap(err) + if eout != nil { + n.limitExceededErrs[eout.Error()]++ + } + n.mut.Unlock() } } diff --git a/core/node/libp2p/rcmgr_logging_test.go b/core/node/libp2p/rcmgr_logging_test.go index 1dfad73afcc..3521e5314ea 100644 --- a/core/node/libp2p/rcmgr_logging_test.go +++ b/core/node/libp2p/rcmgr_logging_test.go @@ -55,7 +55,7 @@ func TestLoggingResourceManager(t *testing.T) { if oLogs.Len() == 0 { continue } - require.Equal(t, "Resource limits were exceeded 2 times, consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) + require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\".", oLogs.All()[0].Message) return } } diff --git a/docs/config.md b/docs/config.md index 844109a799b..af7e7c672e9 100644 --- a/docs/config.md +++ b/docs/config.md @@ -141,6 +141,8 @@ config file at runtime. - [`Swarm.ConnMgr.GracePeriod`](#swarmconnmgrgraceperiod) - [`Swarm.ResourceMgr`](#swarmresourcemgr) - [`Swarm.ResourceMgr.Enabled`](#swarmresourcemgrenabled) + - [`Swarm.ResourceMgr.MaxMemory`](#swarmresourcemgrmaxmemory) + - [`Swarm.ResourceMgr.MaxFileDescriptors`](#swarmresourcemgrmaxfiledescriptors) - [`Swarm.ResourceMgr.Limits`](#swarmresourcemgrlimits) - [`Swarm.ResourceMgr.Allowlist`](#swarmresourcemgrallowlist) - [`Swarm.Transports`](#swarmtransports) @@ -1800,35 +1802,42 @@ Type: `duration` ### `Swarm.ResourceMgr` -**EXPERIMENTAL: `Swarm.ResourceMgr` configuration will change in future release** - The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p-resource-manager#readme) allows setting limits per a scope, and tracking recource usage over time. #### `Swarm.ResourceMgr.Enabled` -**EXPERIMENTAL: `Swarm.ResourceMgr` is in active development, enable it only if you want to provide maintainers with feedback** - - Enables the libp2p Network Resource Manager and auguments the default limits using user-defined ones in `Swarm.ResourceMgr.Limits` (if present). Various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`) -Default: `false` +Default: `true` Type: `flag` -#### `Swarm.ResourceMgr.Limits` +#### `Swarm.ResourceMgr.MaxMemory` + +The maximum amount of memory that the libp2p resource manager will allow. -**EXPERIMENTAL: `Swarm.ResourceMgr.Limits` configuration will change in future release, exposed here only for convenience** +Default: `[TOTAL_SYSTEM_MEMORY]/8` +Type: `optionalBytes` -Map of resource limits [per scope](https://github.com/libp2p/go-libp2p-resource-manager#resource-scopes). +#### `Swarm.ResourceMgr.MaxFileDescriptors` + +Define the maximum number of file descriptors that libp2p can use. + +This param is ignored on Windows. + +Default `[TOTAL_SYSTEM_FILE_DESCRIPTORS]/2` +Type: `optionalInteger` + +#### `Swarm.ResourceMgr.Limits` -The map supports fields from [`BasicLimiterConfig`](https://github.com/libp2p/go-libp2p-resource-manager/blob/v0.3.0/limit_config.go#L165-L185) -struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p-resource-manager#readme). +Map of resource limits [per scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes). -**Example: (format may change in future release)** +The map supports fields from [`ScalingLimitConfig`](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L21-L59) +struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#readme). ```json { @@ -1865,7 +1874,7 @@ Type: `object[string->object]` #### `Swarm.ResourceMgr.Allowlist` A list of multiaddrs that can bypass normal system limits (but are still limited by the allowlist scope). -Convenience config around [go-libp2p-resource-manager#Allowlist.Add](https://pkg.go.dev/github.com/libp2p/go-libp2p-resource-manager#Allowlist.Add). +Convenience config around [go-libp2p-resource-manager#Allowlist.Add](https://pkg.go.dev/github.com/libp2p/go-libp2p/p2p/host/resource-manager#Allowlist.Add). Default: `[]` diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index de0527efd8d..ace69ac94cf 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -178,10 +178,6 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/stretchr/objx v0.4.0 // indirect github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 // indirect - github.com/tidwall/gjson v1.14.0 // indirect - github.com/tidwall/match v1.1.1 // indirect - github.com/tidwall/pretty v1.2.0 // indirect - github.com/wI2L/jsondiff v0.2.0 // indirect github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20210219115102-f37d292932f2 // indirect github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index 5345acee3b8..d774b0831c6 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -1533,12 +1533,6 @@ github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g= -github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w= -github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= -github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= @@ -1552,8 +1546,6 @@ github.com/urfave/cli/v2 v2.0.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2 github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/wI2L/jsondiff v0.2.0 h1:dE00WemBa1uCjrzQUUTE/17I6m5qAaN0EMFOg2Ynr/k= -github.com/wI2L/jsondiff v0.2.0/go.mod h1:axTcwtBkY4TsKuV+RgoMhHyHKKFRI6nnjRLi8LLYQnA= github.com/wangjia184/sortedset v0.0.0-20160527075905-f5d03557ba30/go.mod h1:YkocrP2K2tcw938x9gCOmT5G5eCD6jsTz0SZuyAqwIE= github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= github.com/warpfork/go-testmark v0.9.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= diff --git a/go.mod b/go.mod index 43688f8bfe0..a24d78589d7 100644 --- a/go.mod +++ b/go.mod @@ -90,11 +90,11 @@ require ( github.com/multiformats/go-multicodec v0.7.0 github.com/multiformats/go-multihash v0.2.1 github.com/opentracing/opentracing-go v1.2.0 + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.13.0 github.com/stretchr/testify v1.8.0 github.com/syndtr/goleveldb v1.0.0 - github.com/wI2L/jsondiff v0.2.0 github.com/whyrusleeping/go-sysinfo v0.0.0-20190219211824-4a357d4b90b1 github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7 go.opencensus.io v0.23.0 @@ -210,7 +210,6 @@ require ( github.com/onsi/ginkgo v1.16.5 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/openzipkin/zipkin-go v0.4.0 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e // indirect github.com/prometheus/client_model v0.2.0 // indirect @@ -222,9 +221,6 @@ require ( github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e // indirect - github.com/tidwall/gjson v1.14.0 // indirect - github.com/tidwall/match v1.1.1 // indirect - github.com/tidwall/pretty v1.2.0 // indirect github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20210219115102-f37d292932f2 // indirect diff --git a/go.sum b/go.sum index 348263ffefe..bb54932a86f 100644 --- a/go.sum +++ b/go.sum @@ -1508,12 +1508,6 @@ github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpP github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e h1:T5PdfK/M1xyrHwynxMIVMWLS7f/qHwfslZphxtGnw7s= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g= -github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w= -github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= -github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= @@ -1528,8 +1522,6 @@ github.com/urfave/cli/v2 v2.0.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2 github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/wI2L/jsondiff v0.2.0 h1:dE00WemBa1uCjrzQUUTE/17I6m5qAaN0EMFOg2Ynr/k= -github.com/wI2L/jsondiff v0.2.0/go.mod h1:axTcwtBkY4TsKuV+RgoMhHyHKKFRI6nnjRLi8LLYQnA= github.com/wangjia184/sortedset v0.0.0-20160527075905-f5d03557ba30/go.mod h1:YkocrP2K2tcw938x9gCOmT5G5eCD6jsTz0SZuyAqwIE= github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= github.com/warpfork/go-testmark v0.9.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= diff --git a/test/sharness/t0119-prometheus.sh b/test/sharness/t0119-prometheus.sh index cd734673571..e96b8b96f8a 100755 --- a/test/sharness/t0119-prometheus.sh +++ b/test/sharness/t0119-prometheus.sh @@ -10,6 +10,10 @@ test_description="Test prometheus metrics are exposed correctly" test_init_ipfs +test_expect_success "enable ResourceMgr in the config" ' + ipfs config --json Swarm.ResourceMgr.Enabled false +' + test_launch_ipfs_daemon test_expect_success "collect metrics" ' diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index 15b9a0089e4..ca63639577e 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -2,13 +2,15 @@ # test_description="Test ipfs swarm ResourceMgr config and commands" -export IPFS_CHECK_RCMGR_DEFAULTS=1 - . lib/test-lib.sh test_init_ipfs -# test correct behavior when resource manager is disabled (default behavior) +test_expect_success 'Disable resource manager' ' + ipfs config --bool Swarm.ResourceMgr.Enabled false +' + +# test correct behavior when resource manager is disabled test_launch_ipfs_daemon test_expect_success 'Swarm limit should fail since RM is disabled' ' @@ -79,11 +81,13 @@ test_expect_success 'connected: swarm stats all working properly' ' test_expect_success 'ResourceMgr enabled: swarm stats' ' ipfs swarm stats all --enc=json | tee json && jq -e .System.Memory < json && - jq -e .System.NumConnsInbound < json && - jq -e .System.NumConnsOutbound < json && - jq -e .System.NumFD < json && - jq -e .System.NumStreamsInbound < json && - jq -e .System.NumStreamsOutbound < json && + jq -e .System.FD < json && + jq -e .System.Conns < json && + jq -e .System.ConnsInbound < json && + jq -e .System.ConnsOutbound < json && + jq -e .System.Streams < json && + jq -e .System.StreamsInbound < json && + jq -e .System.StreamsOutbound < json && jq -e .Transient.Memory < json '