From 6c482afe9e4220d07f707a9e7f064597161e62bd Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Fri, 7 Oct 2022 18:00:48 +0200 Subject: [PATCH 01/14] Improve ErrorManager UX This PR adds several new functionalities to make easier the usage of ResourceManager: - Now resource manager logs when resources are exceeded are on ERROR instead of warning. - The resources exceeded error now show what kind of limit was reached - When there was no limit exceeded, we print a message for the user saying that limits were back to normal - Added `swarm limit all` command to show all set limits with the same format as `swarm stats all` - Added `min-used-limit-perc` option to `swarm stats all` to only show stats that are above a specific percentage --- core/commands/swarm.go | 33 +++--- core/node/libp2p/rcmgr.go | 148 ++++++++++++++++++++++++- core/node/libp2p/rcmgr_logging.go | 53 ++++++--- core/node/libp2p/rcmgr_logging_test.go | 2 +- 4 files changed, 202 insertions(+), 34 deletions(-) diff --git a/core/commands/swarm.go b/core/commands/swarm.go index 6897e5eb602..1508efcb8a8 100644 --- a/core/commands/swarm.go +++ b/core/commands/swarm.go @@ -63,11 +63,12 @@ ipfs peers in the internet. } const ( - swarmVerboseOptionName = "verbose" - swarmStreamsOptionName = "streams" - swarmLatencyOptionName = "latency" - swarmDirectionOptionName = "direction" - swarmResetLimitsOptionName = "reset" + swarmVerboseOptionName = "verbose" + swarmStreamsOptionName = "streams" + swarmLatencyOptionName = "latency" + swarmDirectionOptionName = "direction" + swarmResetLimitsOptionName = "reset" + swarmUsedResourcesPercentageName = "min-used-limit-perc" ) type peeringResult struct { @@ -340,6 +341,9 @@ The output of this command is JSON. Arguments: []cmds.Argument{ cmds.StringArg("scope", true, false, "scope of the stat report"), }, + Options: []cmds.Option{ + cmds.IntOption(swarmUsedResourcesPercentageName, "Display only resources that are using above the specified percentage"), + }, Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { node, err := cmdenv.GetNode(env) if err != nil { @@ -353,8 +357,10 @@ The output of this command is JSON. if len(req.Arguments) != 1 { return fmt.Errorf("must specify exactly one scope") } + + percentage, _ := req.Options[swarmUsedResourcesPercentageName].(int) scope := req.Arguments[0] - result, err := libp2p.NetStat(node.ResourceManager, scope) + result, err := libp2p.NetStat(node.ResourceManager, scope, percentage) if err != nil { return err } @@ -378,6 +384,7 @@ var swarmLimitCmd = &cmds.Command{ Tagline: "Get or set resource limits for a scope.", LongDescription: `Get or set resource limits for a scope. The scope can be one of the following: +- all -- all limits actually being applied. - system -- limits for the system aggregate resource usage. - transient -- limits for the transient resource usage. - svc: -- limits for the resource usage of a specific service. @@ -435,19 +442,19 @@ Changes made via command line are persisted in the Swarm.ResourceMgr.Limits fiel } } - var result rcmgr.BaseLimit + var result interface{} _, reset := req.Options[swarmResetLimitsOptionName] if reset { result, err = libp2p.NetResetLimit(node.ResourceManager, node.Repo, scope) - if err != nil { - return err - } + } else if scope == "all" { + result, err = libp2p.NetLimitAll(node.ResourceManager) } else { // get scope limit result, err = libp2p.NetLimit(node.ResourceManager, scope) - if err != nil { - return err - } + } + + if err != nil { + return err } b := new(bytes.Buffer) diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index c9a04338aea..d6a75003f38 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -129,7 +129,7 @@ type NetStatOut struct { Peers map[string]network.ScopeStat `json:",omitempty"` } -func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { +func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStatOut, error) { var err error var result NetStatOut switch { @@ -139,22 +139,42 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { return result, ErrNoResourceMgr } + limits, err := NetLimitAll(mgr) + if err != nil { + return result, err + } + stat := rapi.Stat() - result.System = &stat.System - result.Transient = &stat.Transient + result.System = compareScopes(&stat.System, limits.System, percentage) + result.Transient = compareScopes(&stat.Transient, limits.Transient, percentage) if len(stat.Services) > 0 { - result.Services = stat.Services + result.Services = make(map[string]network.ScopeStat, len(stat.Services)) + for srv, stat := range stat.Services { + ls := limits.Services[srv] + fstat := compareScopes(&stat, &ls, percentage) + if fstat != nil { + result.Services[srv] = *fstat + } + } } if len(stat.Protocols) > 0 { result.Protocols = make(map[string]network.ScopeStat, len(stat.Protocols)) for proto, stat := range stat.Protocols { - result.Protocols[string(proto)] = stat + ls := limits.Protocols[string(proto)] + fstat := compareScopes(&stat, &ls, percentage) + if fstat != nil { + result.Protocols[string(proto)] = *fstat + } } } if len(stat.Peers) > 0 { result.Peers = make(map[string]network.ScopeStat, len(stat.Peers)) for p, stat := range stat.Peers { - result.Peers[p.Pretty()] = stat + ls := limits.Peers[p.Pretty()] + fstat := compareScopes(&stat, &ls, percentage) + if fstat != nil { + result.Protocols[p.Pretty()] = *fstat + } } } @@ -218,6 +238,122 @@ func NetStat(mgr network.ResourceManager, scope string) (NetStatOut, error) { } } +var scopes = []string{ + config.ResourceMgrSystemScope, + config.ResourceMgrTransientScope, + config.ResourceMgrServiceScopePrefix, + config.ResourceMgrProtocolScopePrefix, + config.ResourceMgrPeerScopePrefix, +} + +func limitToScope(l rcmgr.BaseLimit) *network.ScopeStat { + return &network.ScopeStat{ + NumStreamsInbound: l.StreamsInbound, + NumStreamsOutbound: l.StreamsOutbound, + NumConnsInbound: l.ConnsInbound, + NumConnsOutbound: l.ConnsOutbound, + NumFD: l.FD, + Memory: l.Memory, + } +} + +// compareScopes copares stat and limit. +// If any of the stats value are equals or above the specified percentage, +// stat object is returned. +func compareScopes(stat, limit *network.ScopeStat, percentage int) *network.ScopeStat { + if stat == nil || limit == nil { + return nil + } + if abovePercentage(int(stat.Memory), int(limit.Memory), percentage) { + return stat + } + if abovePercentage(stat.NumConnsInbound, limit.NumConnsInbound, percentage) { + return stat + } + if abovePercentage(stat.NumConnsOutbound, limit.NumConnsOutbound, percentage) { + return stat + } + if abovePercentage(stat.NumFD, limit.NumFD, percentage) { + return stat + } + if abovePercentage(stat.NumStreamsInbound, limit.NumStreamsInbound, percentage) { + return stat + } + if abovePercentage(stat.NumStreamsOutbound, limit.NumStreamsOutbound, percentage) { + return stat + } + + return nil +} + +func abovePercentage(v1, v2, percentage int) bool { + if percentage == 0 { + return true + } + + if v2 == 0 { + return false + } + + return int((v1/v2))*100 >= percentage +} + +func NetLimitAll(mgr network.ResourceManager) (*NetStatOut, error) { + var result = &NetStatOut{} + lister, ok := mgr.(rcmgr.ResourceManagerState) + if !ok { // NullResourceManager + return result, ErrNoResourceMgr + } + + for _, s := range scopes { + switch s { + case config.ResourceMgrSystemScope: + s, err := NetLimit(mgr, config.ResourceMgrSystemScope) + if err != nil { + return nil, err + } + result.System = limitToScope(s) + case config.ResourceMgrTransientScope: + s, err := NetLimit(mgr, config.ResourceMgrSystemScope) + if err != nil { + return nil, err + } + result.Transient = limitToScope(s) + case config.ResourceMgrServiceScopePrefix: + result.Services = make(map[string]network.ScopeStat) + for _, serv := range lister.ListServices() { + s, err := NetLimit(mgr, config.ResourceMgrServiceScopePrefix+serv) + if err != nil { + return nil, err + } + result.Services[serv] = *limitToScope(s) + } + case config.ResourceMgrProtocolScopePrefix: + result.Protocols = make(map[string]network.ScopeStat) + for _, prot := range lister.ListProtocols() { + ps := string(prot) + s, err := NetLimit(mgr, config.ResourceMgrProtocolScopePrefix+ps) + if err != nil { + return nil, err + } + result.Protocols[ps] = *limitToScope(s) + } + case config.ResourceMgrPeerScopePrefix: + result.Peers = make(map[string]network.ScopeStat) + for _, peer := range lister.ListPeers() { + ps := peer.Pretty() + s, err := NetLimit(mgr, config.ResourceMgrPeerScopePrefix+ps) + if err != nil { + return nil, err + } + result.Peers[ps] = *limitToScope(s) + } + } + } + + return result, nil +} + func NetLimit(mgr network.ResourceManager, scope string) (rcmgr.BaseLimit, error) { var result rcmgr.BaseLimit getLimit := func(s network.ResourceScope) error { diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 4b50cdc2ef9..5e5a9f6a83f 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -15,6 +15,17 @@ import ( "go.uber.org/zap" ) +type action string + +var ( + reserveMemory action = "ReserveMemory" + openConnection action = "OpenConnection" + openStream action = "OpenStream" + setPeer action = "SetPeer" + setProtocol action = "SetProtocol" + setService action = "SetService" +) + type loggingResourceManager struct { clock clock.Clock logger *zap.SugaredLogger @@ -22,13 +33,14 @@ type loggingResourceManager struct { logInterval time.Duration mut sync.Mutex - limitExceededErrs uint64 + limitExceededErrs map[action]uint64 + previousErrors bool } type loggingScope struct { logger *zap.SugaredLogger delegate network.ResourceScope - countErrs func(error) + countErrs func(error, action) } var _ network.ResourceManager = (*loggingResourceManager)(nil) @@ -47,11 +59,21 @@ func (n *loggingResourceManager) start(ctx context.Context) { case <-ticker.C: n.mut.Lock() errs := n.limitExceededErrs - n.limitExceededErrs = 0 - n.mut.Unlock() - if errs != 0 { - n.logger.Warnf("Resource limits were exceeded %d times, consider inspecting logs and raising the resource manager limits.", errs) + n.limitExceededErrs = make(map[action]uint64) + + for act, count := range errs { + if count != 0 { + n.previousErrors = true + n.logger.Errorf("Resource limits were exceeded %d times on %q, consider inspecting logs and raising the resource manager limits.", count, act) + } } + + if len(errs) == 0 && n.previousErrors { + n.previousErrors = false + n.logger.Errorf("Resource limits were back to normal.") + } + + n.mut.Unlock() case <-ctx.Done(): return } @@ -59,10 +81,13 @@ func (n *loggingResourceManager) start(ctx context.Context) { }() } -func (n *loggingResourceManager) countErrs(err error) { +func (n *loggingResourceManager) countErrs(err error, act action) { if errors.Is(err, network.ErrResourceLimitExceeded) { n.mut.Lock() - n.limitExceededErrs++ + if n.limitExceededErrs == nil { + n.limitExceededErrs = make(map[action]uint64) + } + n.limitExceededErrs[act]++ n.mut.Unlock() } } @@ -92,12 +117,12 @@ func (n *loggingResourceManager) ViewPeer(p peer.ID, f func(network.PeerScope) e } func (n *loggingResourceManager) OpenConnection(dir network.Direction, usefd bool, remote ma.Multiaddr) (network.ConnManagementScope, error) { connMgmtScope, err := n.delegate.OpenConnection(dir, usefd, remote) - n.countErrs(err) + n.countErrs(err, openConnection) return connMgmtScope, err } func (n *loggingResourceManager) OpenStream(p peer.ID, dir network.Direction) (network.StreamManagementScope, error) { connMgmtScope, err := n.delegate.OpenStream(p, dir) - n.countErrs(err) + n.countErrs(err, openStream) return connMgmtScope, err } func (n *loggingResourceManager) Close() error { @@ -140,7 +165,7 @@ func (n *loggingResourceManager) Stat() rcmgr.ResourceManagerStat { func (s *loggingScope) ReserveMemory(size int, prio uint8) error { err := s.delegate.ReserveMemory(size, prio) - s.countErrs(err) + s.countErrs(err, reserveMemory) return err } func (s *loggingScope) ReleaseMemory(size int) { @@ -169,7 +194,7 @@ func (s *loggingScope) PeerScope() network.PeerScope { } func (s *loggingScope) SetPeer(p peer.ID) error { err := s.delegate.(network.ConnManagementScope).SetPeer(p) - s.countErrs(err) + s.countErrs(err, setPeer) return err } func (s *loggingScope) ProtocolScope() network.ProtocolScope { @@ -177,7 +202,7 @@ func (s *loggingScope) ProtocolScope() network.ProtocolScope { } func (s *loggingScope) SetProtocol(proto protocol.ID) error { err := s.delegate.(network.StreamManagementScope).SetProtocol(proto) - s.countErrs(err) + s.countErrs(err, setProtocol) return err } func (s *loggingScope) ServiceScope() network.ServiceScope { @@ -185,7 +210,7 @@ func (s *loggingScope) ServiceScope() network.ServiceScope { } func (s *loggingScope) SetService(srv string) error { err := s.delegate.(network.StreamManagementScope).SetService(srv) - s.countErrs(err) + s.countErrs(err, setService) return err } func (s *loggingScope) Limit() rcmgr.Limit { diff --git a/core/node/libp2p/rcmgr_logging_test.go b/core/node/libp2p/rcmgr_logging_test.go index 1dfad73afcc..e51e06e6c2f 100644 --- a/core/node/libp2p/rcmgr_logging_test.go +++ b/core/node/libp2p/rcmgr_logging_test.go @@ -55,7 +55,7 @@ func TestLoggingResourceManager(t *testing.T) { if oLogs.Len() == 0 { continue } - require.Equal(t, "Resource limits were exceeded 2 times, consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) + require.Equal(t, "Resource limits were exceeded 2 times on \"OpenConnection\", consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) return } } From 86b5dd8a76543b30a32e54d10f99f6b8bbe11408 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Thu, 13 Oct 2022 11:37:58 +0200 Subject: [PATCH 02/14] Improve log output and fix stat.Peers output bug --- core/node/libp2p/rcmgr.go | 2 +- core/node/libp2p/rcmgr_logging.go | 49 +++++++++++--------------- core/node/libp2p/rcmgr_logging_test.go | 2 +- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index d6a75003f38..3e4c0382110 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -173,7 +173,7 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat ls := limits.Peers[p.Pretty()] fstat := compareScopes(&stat, &ls, percentage) if fstat != nil { - result.Protocols[p.Pretty()] = *fstat + result.Peers[p.Pretty()] = *fstat } } } diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 5e5a9f6a83f..984736c8759 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -15,17 +15,6 @@ import ( "go.uber.org/zap" ) -type action string - -var ( - reserveMemory action = "ReserveMemory" - openConnection action = "OpenConnection" - openStream action = "OpenStream" - setPeer action = "SetPeer" - setProtocol action = "SetProtocol" - setService action = "SetService" -) - type loggingResourceManager struct { clock clock.Clock logger *zap.SugaredLogger @@ -33,14 +22,14 @@ type loggingResourceManager struct { logInterval time.Duration mut sync.Mutex - limitExceededErrs map[action]uint64 + limitExceededErrs map[string]int previousErrors bool } type loggingScope struct { logger *zap.SugaredLogger delegate network.ResourceScope - countErrs func(error, action) + countErrs func(error) } var _ network.ResourceManager = (*loggingResourceManager)(nil) @@ -59,13 +48,11 @@ func (n *loggingResourceManager) start(ctx context.Context) { case <-ticker.C: n.mut.Lock() errs := n.limitExceededErrs - n.limitExceededErrs = make(map[action]uint64) + n.limitExceededErrs = make(map[string]int) - for act, count := range errs { - if count != 0 { - n.previousErrors = true - n.logger.Errorf("Resource limits were exceeded %d times on %q, consider inspecting logs and raising the resource manager limits.", count, act) - } + for e, count := range errs { + n.previousErrors = true + n.logger.Errorf("Resource limits were exceeded %d times with error %q. Consider inspecting logs and raising the resource manager limits.", count, e) } if len(errs) == 0 && n.previousErrors { @@ -81,13 +68,19 @@ func (n *loggingResourceManager) start(ctx context.Context) { }() } -func (n *loggingResourceManager) countErrs(err error, act action) { +func (n *loggingResourceManager) countErrs(err error) { if errors.Is(err, network.ErrResourceLimitExceeded) { n.mut.Lock() if n.limitExceededErrs == nil { - n.limitExceededErrs = make(map[action]uint64) + n.limitExceededErrs = make(map[string]int) } - n.limitExceededErrs[act]++ + + // we need to unwrap the error to get the limit scope and the kind of reached limit + eout := errors.Unwrap(err) + if eout != nil { + n.limitExceededErrs[eout.Error()]++ + } + n.mut.Unlock() } } @@ -117,12 +110,12 @@ func (n *loggingResourceManager) ViewPeer(p peer.ID, f func(network.PeerScope) e } func (n *loggingResourceManager) OpenConnection(dir network.Direction, usefd bool, remote ma.Multiaddr) (network.ConnManagementScope, error) { connMgmtScope, err := n.delegate.OpenConnection(dir, usefd, remote) - n.countErrs(err, openConnection) + n.countErrs(err) return connMgmtScope, err } func (n *loggingResourceManager) OpenStream(p peer.ID, dir network.Direction) (network.StreamManagementScope, error) { connMgmtScope, err := n.delegate.OpenStream(p, dir) - n.countErrs(err, openStream) + n.countErrs(err) return connMgmtScope, err } func (n *loggingResourceManager) Close() error { @@ -165,7 +158,7 @@ func (n *loggingResourceManager) Stat() rcmgr.ResourceManagerStat { func (s *loggingScope) ReserveMemory(size int, prio uint8) error { err := s.delegate.ReserveMemory(size, prio) - s.countErrs(err, reserveMemory) + s.countErrs(err) return err } func (s *loggingScope) ReleaseMemory(size int) { @@ -194,7 +187,7 @@ func (s *loggingScope) PeerScope() network.PeerScope { } func (s *loggingScope) SetPeer(p peer.ID) error { err := s.delegate.(network.ConnManagementScope).SetPeer(p) - s.countErrs(err, setPeer) + s.countErrs(err) return err } func (s *loggingScope) ProtocolScope() network.ProtocolScope { @@ -202,7 +195,7 @@ func (s *loggingScope) ProtocolScope() network.ProtocolScope { } func (s *loggingScope) SetProtocol(proto protocol.ID) error { err := s.delegate.(network.StreamManagementScope).SetProtocol(proto) - s.countErrs(err, setProtocol) + s.countErrs(err) return err } func (s *loggingScope) ServiceScope() network.ServiceScope { @@ -210,7 +203,7 @@ func (s *loggingScope) ServiceScope() network.ServiceScope { } func (s *loggingScope) SetService(srv string) error { err := s.delegate.(network.StreamManagementScope).SetService(srv) - s.countErrs(err, setService) + s.countErrs(err) return err } func (s *loggingScope) Limit() rcmgr.Limit { diff --git a/core/node/libp2p/rcmgr_logging_test.go b/core/node/libp2p/rcmgr_logging_test.go index e51e06e6c2f..f37a8515858 100644 --- a/core/node/libp2p/rcmgr_logging_test.go +++ b/core/node/libp2p/rcmgr_logging_test.go @@ -55,7 +55,7 @@ func TestLoggingResourceManager(t *testing.T) { if oLogs.Len() == 0 { continue } - require.Equal(t, "Resource limits were exceeded 2 times on \"OpenConnection\", consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) + require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\". Consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) return } } From 6ba5a153e21221d7ec7eb50b9adbb712f6d3d713 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 19 Oct 2022 12:37:44 +0200 Subject: [PATCH 03/14] Requested changes --- core/node/libp2p/rcmgr_logging.go | 4 ++-- core/node/libp2p/rcmgr_logging_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 984736c8759..57be9205b8e 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -52,12 +52,12 @@ func (n *loggingResourceManager) start(ctx context.Context) { for e, count := range errs { n.previousErrors = true - n.logger.Errorf("Resource limits were exceeded %d times with error %q. Consider inspecting logs and raising the resource manager limits.", count, e) + n.logger.Errorf("Resource limits were exceeded %d times with error %q. Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr", count, e) } if len(errs) == 0 && n.previousErrors { n.previousErrors = false - n.logger.Errorf("Resource limits were back to normal.") + n.logger.Errorf("Resrouce limits are no longer being exceeded.") } n.mut.Unlock() diff --git a/core/node/libp2p/rcmgr_logging_test.go b/core/node/libp2p/rcmgr_logging_test.go index f37a8515858..5be4276e5ad 100644 --- a/core/node/libp2p/rcmgr_logging_test.go +++ b/core/node/libp2p/rcmgr_logging_test.go @@ -55,7 +55,7 @@ func TestLoggingResourceManager(t *testing.T) { if oLogs.Len() == 0 { continue } - require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\". Consider inspecting logs and raising the resource manager limits.", oLogs.All()[0].Message) + require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\". Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr", oLogs.All()[0].Message) return } } From e75b5709694d9f6f6c8f09ce502f02feaa022a63 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 10:20:03 +0100 Subject: [PATCH 04/14] Simplify ResourceManager defaults. Signed-off-by: Antonio Navarro Perez --- .circleci/main.yml | 1 - core/node/libp2p/rcmgr_defaults.go | 868 ++--------------------------- test/sharness/t0139-swarm-rcmgr.sh | 2 - 3 files changed, 60 insertions(+), 811 deletions(-) diff --git a/.circleci/main.yml b/.circleci/main.yml index 67f3e711b19..86684a5d0cc 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -32,7 +32,6 @@ default_environment: &default_environment CIRCLE_TEST_REPORTS: /tmp/circleci-test-results CIRCLE_ARTIFACTS: /tmp/circleci-artifacts GIT_PAGER: cat - IPFS_CHECK_RCMGR_DEFAULTS: 1 executors: golang: diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index 8d77e56aa06..a06840e3a50 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -1,832 +1,84 @@ package libp2p import ( - "encoding/json" - "fmt" - "math/bits" - "os" - "strings" + "math" "github.com/ipfs/kubo/config" "github.com/libp2p/go-libp2p" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" - - "github.com/wI2L/jsondiff" ) +var infiniteBaseLimit = rcmgr.BaseLimit{ + Streams: math.MaxInt, + StreamsInbound: math.MaxInt, + StreamsOutbound: math.MaxInt, + Conns: math.MaxInt, + ConnsInbound: math.MaxInt, + ConnsOutbound: math.MaxInt, + FD: math.MaxInt, + Memory: math.MaxInt, +} + // This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled // adjustedDefaultLimits allows for tweaking defaults based on external factors, // such as values in Swarm.ConnMgr.HiWater config. func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { - // Run checks to avoid introducing regressions - if os.Getenv("IPFS_CHECK_RCMGR_DEFAULTS") != "" { - checkImplicitDefaults() + defaultLimits := rcmgr.ScalingLimitConfig{ + SystemBaseLimit: rcmgr.BaseLimit{ + Memory: rcmgr.DefaultLimits.SystemBaseLimit.Memory, + FD: rcmgr.DefaultLimits.SystemBaseLimit.FD, + + Conns: math.MaxInt, // just limit on the inbound + ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default + ConnsOutbound: math.MaxInt, + + // Don't limit streams. Rely on connection and memory limits. + Streams: math.MaxInt, + StreamsInbound: math.MaxInt, + StreamsOutbound: math.MaxInt, + }, + + // Just go with what libp2p does + TransientBaseLimit: rcmgr.DefaultLimits.TransientBaseLimit, + TransientLimitIncrease: rcmgr.DefaultLimits.TransientLimitIncrease, + + // Lets get out of the way of the allow list functionality. + // If someone specified "Swarm.ResourceMgr.Allowlist" we should let it go through. + AllowlistedSystemBaseLimit: infiniteBaseLimit, + AllowlistedTransientBaseLimit: infiniteBaseLimit, + + // Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Peer, Conn, or Stream limits. + ServiceBaseLimit: infiniteBaseLimit, + ServicePeerBaseLimit: infiniteBaseLimit, + ProtocolBaseLimit: infiniteBaseLimit, + ProtocolPeerBaseLimit: infiniteBaseLimit, + ConnBaseLimit: infiniteBaseLimit, + StreamBaseLimit: infiniteBaseLimit, + + // Limit connections per peer. Really important to mitigate flooding attacks from a peer. + PeerBaseLimit: rcmgr.BaseLimit{ + Streams: math.MaxInt, + StreamsOutbound: math.MaxInt, + StreamsInbound: 1, + Conns: math.MaxInt, + ConnsInbound: 1, + ConnsOutbound: math.MaxInt, + FD: rcmgr.DefaultLimits.PeerBaseLimit.FD, + Memory: rcmgr.DefaultLimits.PeerBaseLimit.Memory, + }, } - defaultLimits := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&defaultLimits) - // Adjust limits - // (based on https://github.com/filecoin-project/lotus/pull/8318/files) - // - if Swarm.ConnMgr.HighWater is too high, adjust Conn/FD/Stream limits + libp2p.SetDefaultServiceLimits(&defaultLimits) - // Outbound conns and FDs are set very high to allow for the accelerated DHT client to (re)load its routing table. - // Currently it doesn't gracefully handle RM throttling--once it does we can lower these. - // High outbound conn limits are considered less of a DoS risk than high inbound conn limits. - // Also note that, due to the behavior of the accelerated DHT client, we don't need many streams, just conns. - if minOutbound := 65536; defaultLimits.SystemBaseLimit.ConnsOutbound < minOutbound { - defaultLimits.SystemBaseLimit.ConnsOutbound = minOutbound - } - if minFD := 4096; defaultLimits.SystemBaseLimit.FD < minFD { - defaultLimits.SystemBaseLimit.FD = minFD - } defaultLimitConfig := defaultLimits.AutoScale() - // Do we need to adjust due to Swarm.ConnMgr.HighWater? + // If a high water mark is set: if cfg.ConnMgr.Type == "basic" { - maxconns := cfg.ConnMgr.HighWater - if 2*maxconns > defaultLimitConfig.System.ConnsInbound { - // adjust conns to 2x to allow for two conns per peer (TCP+QUIC) - defaultLimitConfig.System.ConnsInbound = logScale(2 * maxconns) - defaultLimitConfig.System.ConnsOutbound = logScale(2 * maxconns) - defaultLimitConfig.System.Conns = logScale(4 * maxconns) - - defaultLimitConfig.System.StreamsInbound = logScale(16 * maxconns) - defaultLimitConfig.System.StreamsOutbound = logScale(64 * maxconns) - defaultLimitConfig.System.Streams = logScale(64 * maxconns) - - if 2*maxconns > defaultLimitConfig.System.FD { - defaultLimitConfig.System.FD = logScale(2 * maxconns) - } - - defaultLimitConfig.ServiceDefault.StreamsInbound = logScale(8 * maxconns) - defaultLimitConfig.ServiceDefault.StreamsOutbound = logScale(32 * maxconns) - defaultLimitConfig.ServiceDefault.Streams = logScale(32 * maxconns) - - defaultLimitConfig.ProtocolDefault.StreamsInbound = logScale(8 * maxconns) - defaultLimitConfig.ProtocolDefault.StreamsOutbound = logScale(32 * maxconns) - defaultLimitConfig.ProtocolDefault.Streams = logScale(32 * maxconns) - - log.Info("adjusted default resource manager limits") - } - + // set the connection limit higher than high water mark so that the ConnMgr has "space and time" to close "least useful" connections. + defaultLimitConfig.System.Conns = 2 * cfg.ConnMgr.HighWater + log.Info("adjusted default resource manager System.Conns limits to match ConnMgr.HighWater value of %s", cfg.ConnMgr.HighWater) } return defaultLimitConfig } - -func logScale(val int) int { - bitlen := bits.Len(uint(val)) - return 1 << bitlen -} - -// checkImplicitDefaults compares libp2p defaults agains expected ones -// and panics when they don't match. This ensures we are not surprised -// by silent default limit changes when we update go-libp2p dependencies. -func checkImplicitDefaults() { - ok := true - - // Check 1: did go-libp2p-resource-manager's DefaultLimits change? - defaults, err := json.Marshal(rcmgr.DefaultLimits) - if err != nil { - log.Fatal(err) - } - changes, err := jsonDiff([]byte(expectedDefaultLimits), defaults) - if err != nil { - log.Fatal(err) - } - if len(changes) > 0 { - ok = false - log.Errorf("===> OOF! go-libp2p-resource-manager changed DefaultLimits\n"+ - "=> changes ('test' represents the old value):\n%s\n"+ - "=> go-libp2p-resource-manager DefaultLimits update needs a review:\n"+ - "Please inspect if changes impact go-ipfs users, and update expectedDefaultLimits in rcmgr_defaults.go to remove this message", - strings.Join(changes, "\n"), - ) - } - - // Check 2: did go-libp2p's SetDefaultServiceLimits change? - // We compare the baseline (min specs), and check if we went down in any limits. - l := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&l) - limits := l.AutoScale() - testLimiter := rcmgr.NewFixedLimiter(limits) - - serviceDefaults, err := json.Marshal(testLimiter) - if err != nil { - log.Fatal(err) - } - changes, err = jsonDiff([]byte(expectedDefaultServiceLimits), serviceDefaults) - if err != nil { - log.Fatal(err) - } - if len(changes) > 0 { - oldState := map[string]int{} - type Op struct { - Op string - Path string - Value int - } - for _, changeStr := range changes { - change := Op{} - err := json.Unmarshal([]byte(changeStr), &change) - if err != nil { - continue - } - if change.Op == "test" { - oldState[change.Path] = change.Value - } - } - - for _, changeStr := range changes { - change := Op{} - err := json.Unmarshal([]byte(changeStr), &change) - if err != nil { - continue - } - if change.Op == "replace" { - oldVal, okFound := oldState[change.Path] - if okFound && oldVal > change.Value { - ok = false - fmt.Printf("reduced value for %s. Old: %v; new: %v\n", change.Path, oldVal, change.Value) - } - } - } - - if !ok { - log.Errorf("===> OOF! go-libp2p changed DefaultServiceLimits\n" + - "=> See the aboce reduced values for info.\n" + - "=> go-libp2p SetDefaultServiceLimits update needs a review:\n" + - "Please inspect if changes impact go-ipfs users, and update expectedDefaultServiceLimits in rcmgr_defaults.go to remove this message", - ) - } - } - if !ok { - log.Fatal("daemon will refuse to run with the resource manager until this is resolved") - } -} - -// jsonDiff compares two strings and returns diff in JSON Patch format -func jsonDiff(old []byte, updated []byte) ([]string, error) { - // generate 'invertible' patch which includes old values as "test" op - patch, err := jsondiff.CompareJSONOpts(old, updated, jsondiff.Invertible()) - changes := make([]string, len(patch)) - if err != nil { - return changes, err - } - for i, op := range patch { - changes[i] = fmt.Sprintf(" %s", op) - } - return changes, nil -} - -// https://github.com/libp2p/go-libp2p/blob/v0.22.0/p2p/host/resource-manager/limit_defaults.go#L343 -const expectedDefaultLimits = `{ - "SystemBaseLimit": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "SystemLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "Memory": 1073741824, - "FDFraction": 1 - }, - "TransientBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "TransientLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 32, - "ConnsInbound": 16, - "ConnsOutbound": 32, - "Memory": 134217728, - "FDFraction": 0.25 - }, - "AllowlistedSystemBaseLimit": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "AllowlistedSystemLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "Memory": 1073741824, - "FDFraction": 1 - }, - "AllowlistedTransientBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "AllowlistedTransientLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 32, - "ConnsInbound": 16, - "ConnsOutbound": 32, - "Memory": 134217728, - "FDFraction": 0.25 - }, - "ServiceBaseLimit": { - "Streams": 4096, - "StreamsInbound": 1024, - "StreamsOutbound": 4096, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "ServiceLimitIncrease": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 134217728, - "FDFraction": 0 - }, - "ServiceLimits": null, - "ServicePeerBaseLimit": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ServicePeerLimitIncrease": { - "Streams": 8, - "StreamsInbound": 4, - "StreamsOutbound": 8, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 4194304, - "FDFraction": 0 - }, - "ServicePeerLimits": null, - "ProtocolBaseLimit": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "ProtocolLimitIncrease": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 171966464, - "FDFraction": 0 - }, - "ProtocolLimits": null, - "ProtocolPeerBaseLimit": { - "Streams": 256, - "StreamsInbound": 64, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ProtocolPeerLimitIncrease": { - "Streams": 16, - "StreamsInbound": 4, - "StreamsOutbound": 8, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 4, - "FDFraction": 0 - }, - "ProtocolPeerLimits": null, - "PeerBaseLimit": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 8, - "ConnsInbound": 4, - "ConnsOutbound": 8, - "FD": 4, - "Memory": 67108864 - }, - "PeerLimitIncrease": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 134217728, - "FDFraction": 0.015625 - }, - "PeerLimits": null, - "ConnBaseLimit": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 1, - "ConnsInbound": 1, - "ConnsOutbound": 1, - "FD": 1, - "Memory": 33554432 - }, - "ConnLimitIncrease": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 0, - "FDFraction": 0 - }, - "StreamBaseLimit": { - "Streams": 1, - "StreamsInbound": 1, - "StreamsOutbound": 1, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "StreamLimitIncrease": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "Memory": 0, - "FDFraction": 0 - } -}` - -// Generated from the default limits and scaling to 0 (base limit). -const expectedDefaultServiceLimits = `{ - "System": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "Transient": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "AllowlistedSystem": { - "Streams": 2048, - "StreamsInbound": 1024, - "StreamsOutbound": 2048, - "Conns": 128, - "ConnsInbound": 64, - "ConnsOutbound": 128, - "FD": 256, - "Memory": 134217728 - }, - "AllowlistedTransient": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 64, - "ConnsInbound": 32, - "ConnsOutbound": 64, - "FD": 64, - "Memory": 33554432 - }, - "ServiceDefault": { - "Streams": 4096, - "StreamsInbound": 1024, - "StreamsOutbound": 4096, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "Service": { - "libp2p.autonat": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.holepunch": { - "Streams": 64, - "StreamsInbound": 32, - "StreamsOutbound": 32, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.identify": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.ping": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "libp2p.relay/v1": { - "Streams": 256, - "StreamsInbound": 256, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "libp2p.relay/v2": { - "Streams": 256, - "StreamsInbound": 256, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - } - }, - "ServicePeerDefault": { - "Streams": 256, - "StreamsInbound": 128, - "StreamsOutbound": 256, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ServicePeer": { - "libp2p.autonat": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.holepunch": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.identify": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.ping": { - "Streams": 4, - "StreamsInbound": 2, - "StreamsOutbound": 3, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "libp2p.relay/v1": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "libp2p.relay/v2": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - } - }, - "ProtocolDefault": { - "Streams": 2048, - "StreamsInbound": 512, - "StreamsOutbound": 2048, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 67108864 - }, - "Protocol": { - "/ipfs/id/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/ipfs/id/push/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/ipfs/ping/1.0.0": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/libp2p/autonat/1.0.0": { - "Streams": 64, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/libp2p/circuit/relay/0.1.0": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/circuit/relay/0.2.0/hop": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/circuit/relay/0.2.0/stop": { - "Streams": 640, - "StreamsInbound": 640, - "StreamsOutbound": 640, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "/libp2p/dcutr": { - "Streams": 64, - "StreamsInbound": 32, - "StreamsOutbound": 32, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - }, - "/p2p/id/delta/1.0.0": { - "Streams": 128, - "StreamsInbound": 64, - "StreamsOutbound": 64, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 4194304 - } - }, - "ProtocolPeerDefault": { - "Streams": 256, - "StreamsInbound": 64, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - }, - "ProtocolPeer": { - "/ipfs/id/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/ipfs/id/push/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/ipfs/ping/1.0.0": { - "Streams": 4, - "StreamsInbound": 2, - "StreamsOutbound": 3, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - }, - "/libp2p/autonat/1.0.0": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "/libp2p/circuit/relay/0.1.0": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/circuit/relay/0.2.0/hop": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/circuit/relay/0.2.0/stop": { - "Streams": 128, - "StreamsInbound": 128, - "StreamsOutbound": 128, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 33554432 - }, - "/libp2p/dcutr": { - "Streams": 2, - "StreamsInbound": 2, - "StreamsOutbound": 2, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 1048576 - }, - "/p2p/id/delta/1.0.0": { - "Streams": 32, - "StreamsInbound": 16, - "StreamsOutbound": 16, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 8590458880 - } - }, - "PeerDefault": { - "Streams": 512, - "StreamsInbound": 256, - "StreamsOutbound": 512, - "Conns": 8, - "ConnsInbound": 4, - "ConnsOutbound": 8, - "FD": 4, - "Memory": 67108864 - }, - "Conn": { - "Streams": 0, - "StreamsInbound": 0, - "StreamsOutbound": 0, - "Conns": 1, - "ConnsInbound": 1, - "ConnsOutbound": 1, - "FD": 1, - "Memory": 1048576 - }, - "Stream": { - "Streams": 1, - "StreamsInbound": 1, - "StreamsOutbound": 1, - "Conns": 0, - "ConnsInbound": 0, - "ConnsOutbound": 0, - "FD": 0, - "Memory": 16777216 - } -}` diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index 15b9a0089e4..f4915791a63 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -2,8 +2,6 @@ # test_description="Test ipfs swarm ResourceMgr config and commands" -export IPFS_CHECK_RCMGR_DEFAULTS=1 - . lib/test-lib.sh test_init_ipfs From 46f6ab3e2bee4a04fc37cfbb140ab4ecc67ca3a0 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 10:43:07 +0100 Subject: [PATCH 05/14] Improve error messages and tweak default values. Signed-off-by: Antonio Navarro Perez --- core/node/libp2p/rcmgr_defaults.go | 8 ++++---- core/node/libp2p/rcmgr_logging.go | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index a06840e3a50..b560c172c36 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -33,7 +33,7 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default ConnsOutbound: math.MaxInt, - // Don't limit streams. Rely on connection and memory limits. + // Don't limit streams. Rely on peer and transient limits. Streams: math.MaxInt, StreamsInbound: math.MaxInt, StreamsOutbound: math.MaxInt, @@ -48,7 +48,7 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { AllowlistedSystemBaseLimit: infiniteBaseLimit, AllowlistedTransientBaseLimit: infiniteBaseLimit, - // Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Peer, Conn, or Stream limits. + // Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Conn, or Stream limits. ServiceBaseLimit: infiniteBaseLimit, ServicePeerBaseLimit: infiniteBaseLimit, ProtocolBaseLimit: infiniteBaseLimit, @@ -60,9 +60,9 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { PeerBaseLimit: rcmgr.BaseLimit{ Streams: math.MaxInt, StreamsOutbound: math.MaxInt, - StreamsInbound: 1, + StreamsInbound: rcmgr.DefaultLimits.PeerBaseLimit.StreamsInbound, Conns: math.MaxInt, - ConnsInbound: 1, + ConnsInbound: rcmgr.DefaultLimits.PeerBaseLimit.ConnsInbound, ConnsOutbound: math.MaxInt, FD: rcmgr.DefaultLimits.PeerBaseLimit.FD, Memory: rcmgr.DefaultLimits.PeerBaseLimit.Memory, diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 57be9205b8e..66e22c64a29 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -52,7 +52,11 @@ func (n *loggingResourceManager) start(ctx context.Context) { for e, count := range errs { n.previousErrors = true - n.logger.Errorf("Resource limits were exceeded %d times with error %q. Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr", count, e) + n.logger.Errorf("Resource limits were exceeded %d times with error %q.", count, e) + } + + if len(errs) != 0 { + n.logger.Errorf("Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr") } if len(errs) == 0 && n.previousErrors { From d2b8da116ad8c154be7daf5e6f55b2c10eb10ff9 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 11:10:23 +0100 Subject: [PATCH 06/14] Go mod tidy Signed-off-by: Antonio Navarro Perez --- docs/examples/kubo-as-a-library/go.mod | 4 ---- docs/examples/kubo-as-a-library/go.sum | 8 -------- go.mod | 4 ---- go.sum | 8 -------- 4 files changed, 24 deletions(-) diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index de0527efd8d..ace69ac94cf 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -178,10 +178,6 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/stretchr/objx v0.4.0 // indirect github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 // indirect - github.com/tidwall/gjson v1.14.0 // indirect - github.com/tidwall/match v1.1.1 // indirect - github.com/tidwall/pretty v1.2.0 // indirect - github.com/wI2L/jsondiff v0.2.0 // indirect github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20210219115102-f37d292932f2 // indirect github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index 5345acee3b8..d774b0831c6 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -1533,12 +1533,6 @@ github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g= -github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w= -github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= -github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= @@ -1552,8 +1546,6 @@ github.com/urfave/cli/v2 v2.0.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2 github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/wI2L/jsondiff v0.2.0 h1:dE00WemBa1uCjrzQUUTE/17I6m5qAaN0EMFOg2Ynr/k= -github.com/wI2L/jsondiff v0.2.0/go.mod h1:axTcwtBkY4TsKuV+RgoMhHyHKKFRI6nnjRLi8LLYQnA= github.com/wangjia184/sortedset v0.0.0-20160527075905-f5d03557ba30/go.mod h1:YkocrP2K2tcw938x9gCOmT5G5eCD6jsTz0SZuyAqwIE= github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= github.com/warpfork/go-testmark v0.9.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= diff --git a/go.mod b/go.mod index 43688f8bfe0..b9edd13050f 100644 --- a/go.mod +++ b/go.mod @@ -94,7 +94,6 @@ require ( github.com/prometheus/client_golang v1.13.0 github.com/stretchr/testify v1.8.0 github.com/syndtr/goleveldb v1.0.0 - github.com/wI2L/jsondiff v0.2.0 github.com/whyrusleeping/go-sysinfo v0.0.0-20190219211824-4a357d4b90b1 github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7 go.opencensus.io v0.23.0 @@ -222,9 +221,6 @@ require ( github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e // indirect - github.com/tidwall/gjson v1.14.0 // indirect - github.com/tidwall/match v1.1.1 // indirect - github.com/tidwall/pretty v1.2.0 // indirect github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect github.com/whyrusleeping/base32 v0.0.0-20170828182744-c30ac30633cc // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20210219115102-f37d292932f2 // indirect diff --git a/go.sum b/go.sum index 348263ffefe..bb54932a86f 100644 --- a/go.sum +++ b/go.sum @@ -1508,12 +1508,6 @@ github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpP github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e h1:T5PdfK/M1xyrHwynxMIVMWLS7f/qHwfslZphxtGnw7s= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g= -github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w= -github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= -github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= @@ -1528,8 +1522,6 @@ github.com/urfave/cli/v2 v2.0.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2 github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/wI2L/jsondiff v0.2.0 h1:dE00WemBa1uCjrzQUUTE/17I6m5qAaN0EMFOg2Ynr/k= -github.com/wI2L/jsondiff v0.2.0/go.mod h1:axTcwtBkY4TsKuV+RgoMhHyHKKFRI6nnjRLi8LLYQnA= github.com/wangjia184/sortedset v0.0.0-20160527075905-f5d03557ba30/go.mod h1:YkocrP2K2tcw938x9gCOmT5G5eCD6jsTz0SZuyAqwIE= github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= github.com/warpfork/go-testmark v0.9.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= From c8d28b38e53f9975f81e364e116349546f60c2fb Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 11:35:51 +0100 Subject: [PATCH 07/14] Enable ResourceManager by default. --- core/node/libp2p/rcmgr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index 3e4c0382110..eab49deebcd 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -35,7 +35,7 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { var manager network.ResourceManager var opts Libp2pOpts - enabled := cfg.ResourceMgr.Enabled.WithDefault(false) + enabled := cfg.ResourceMgr.Enabled.WithDefault(true) // ENV overrides Config (if present) switch os.Getenv("LIBP2P_RCMGR") { From 1f096080cdf4d2bd0a7b7f2548c7e815e0e58764 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 11:38:16 +0100 Subject: [PATCH 08/14] Fix rcmgr logging test Signed-off-by: Antonio Navarro Perez --- core/node/libp2p/rcmgr_logging_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/node/libp2p/rcmgr_logging_test.go b/core/node/libp2p/rcmgr_logging_test.go index 5be4276e5ad..3521e5314ea 100644 --- a/core/node/libp2p/rcmgr_logging_test.go +++ b/core/node/libp2p/rcmgr_logging_test.go @@ -55,7 +55,7 @@ func TestLoggingResourceManager(t *testing.T) { if oLogs.Len() == 0 { continue } - require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\". Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr", oLogs.All()[0].Message) + require.Equal(t, "Resource limits were exceeded 2 times with error \"system: cannot reserve inbound connection: resource limit exceeded\".", oLogs.All()[0].Message) return } } From 3fc3d9905e831b1763f25462892fffd406fe7f76 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 13:20:21 +0100 Subject: [PATCH 09/14] Fix shaness tests Signed-off-by: Antonio Navarro Perez --- core/node/libp2p/rcmgr_defaults.go | 41 +++++++++++++++++------------- test/sharness/t0119-prometheus.sh | 4 +++ test/sharness/t0139-swarm-rcmgr.sh | 6 ++++- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index b560c172c36..8dff7e0a43c 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -8,15 +8,22 @@ import ( rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" ) +// We are doing some magic when parsing config files (we are using a map[string]interface{} to compare config files). +// When you don't have a type the JSON Parse function cast numbers to float64 by default, +// losing precision when writing the final number. So if we use math.MaxInt as our infinite number, +// after writing the config file we will have 9223372036854776000 instead of 9223372036854775807, +// making the parsing process fail. +const bigEnough = math.MaxInt / 2 + var infiniteBaseLimit = rcmgr.BaseLimit{ - Streams: math.MaxInt, - StreamsInbound: math.MaxInt, - StreamsOutbound: math.MaxInt, - Conns: math.MaxInt, - ConnsInbound: math.MaxInt, - ConnsOutbound: math.MaxInt, - FD: math.MaxInt, - Memory: math.MaxInt, + Streams: bigEnough, + StreamsInbound: bigEnough, + StreamsOutbound: bigEnough, + Conns: bigEnough, + ConnsInbound: bigEnough, + ConnsOutbound: bigEnough, + FD: bigEnough, + Memory: bigEnough, } // This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled @@ -29,14 +36,14 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { Memory: rcmgr.DefaultLimits.SystemBaseLimit.Memory, FD: rcmgr.DefaultLimits.SystemBaseLimit.FD, - Conns: math.MaxInt, // just limit on the inbound + Conns: bigEnough, // just limit on the inbound ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default - ConnsOutbound: math.MaxInt, + ConnsOutbound: bigEnough, // Don't limit streams. Rely on peer and transient limits. - Streams: math.MaxInt, - StreamsInbound: math.MaxInt, - StreamsOutbound: math.MaxInt, + Streams: bigEnough, + StreamsInbound: bigEnough, + StreamsOutbound: bigEnough, }, // Just go with what libp2p does @@ -58,12 +65,12 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { // Limit connections per peer. Really important to mitigate flooding attacks from a peer. PeerBaseLimit: rcmgr.BaseLimit{ - Streams: math.MaxInt, - StreamsOutbound: math.MaxInt, + Streams: bigEnough, + StreamsOutbound: bigEnough, StreamsInbound: rcmgr.DefaultLimits.PeerBaseLimit.StreamsInbound, - Conns: math.MaxInt, + Conns: bigEnough, ConnsInbound: rcmgr.DefaultLimits.PeerBaseLimit.ConnsInbound, - ConnsOutbound: math.MaxInt, + ConnsOutbound: bigEnough, FD: rcmgr.DefaultLimits.PeerBaseLimit.FD, Memory: rcmgr.DefaultLimits.PeerBaseLimit.Memory, }, diff --git a/test/sharness/t0119-prometheus.sh b/test/sharness/t0119-prometheus.sh index cd734673571..e96b8b96f8a 100755 --- a/test/sharness/t0119-prometheus.sh +++ b/test/sharness/t0119-prometheus.sh @@ -10,6 +10,10 @@ test_description="Test prometheus metrics are exposed correctly" test_init_ipfs +test_expect_success "enable ResourceMgr in the config" ' + ipfs config --json Swarm.ResourceMgr.Enabled false +' + test_launch_ipfs_daemon test_expect_success "collect metrics" ' diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index f4915791a63..290b1bdd3f0 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -6,7 +6,11 @@ test_description="Test ipfs swarm ResourceMgr config and commands" test_init_ipfs -# test correct behavior when resource manager is disabled (default behavior) +test_expect_success 'Disable resource manager' ' + ipfs config --bool Swarm.ResourceMgr.Enabled false +' + +# test correct behavior when resource manager is disabled test_launch_ipfs_daemon test_expect_success 'Swarm limit should fail since RM is disabled' ' From fb881babeff21d664c59d2451dc61f1364bce9b8 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 19:44:26 +0100 Subject: [PATCH 10/14] Requested changes. Signed-off-by: Antonio Navarro Perez --- config/swarm.go | 4 + core/node/libp2p/rcmgr.go | 100 +++++++++-------- core/node/libp2p/rcmgr_defaults.go | 172 +++++++++++++++++++++++++---- docs/config.md | 33 +++--- test/sharness/t0139-swarm-rcmgr.sh | 12 +- 5 files changed, 233 insertions(+), 88 deletions(-) diff --git a/config/swarm.go b/config/swarm.go index 01181f36ee5..23def69d51e 100644 --- a/config/swarm.go +++ b/config/swarm.go @@ -143,6 +143,10 @@ type ResourceMgr struct { // Enables the Network Resource Manager feature, default to on. Enabled Flag `json:",omitempty"` Limits *rcmgr.LimitConfig `json:",omitempty"` + + MaxMemory int64 `json:",omitempty"` + MaxFileDescriptors int `json:",omitempty"` + // A list of multiaddrs that can bypass normal system limits (but are still // limited by the allowlist scope). Convenience config around // https://pkg.go.dev/github.com/libp2p/go-libp2p/p2p/host/resource-manager#Allowlist.Add diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index eab49deebcd..66c97807a03 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -53,7 +53,7 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { return nil, opts, fmt.Errorf("opening IPFS_PATH: %w", err) } - limits := adjustedDefaultLimits(cfg) + limits := createDefaultLimitConfig(cfg) if cfg.ResourceMgr.Limits != nil { l := *cfg.ResourceMgr.Limits @@ -122,11 +122,11 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { } type NetStatOut struct { - System *network.ScopeStat `json:",omitempty"` - Transient *network.ScopeStat `json:",omitempty"` - Services map[string]network.ScopeStat `json:",omitempty"` - Protocols map[string]network.ScopeStat `json:",omitempty"` - Peers map[string]network.ScopeStat `json:",omitempty"` + System *rcmgr.BaseLimit `json:",omitempty"` + Transient *rcmgr.BaseLimit `json:",omitempty"` + Services map[string]rcmgr.BaseLimit `json:",omitempty"` + Protocols map[string]rcmgr.BaseLimit `json:",omitempty"` + Peers map[string]rcmgr.BaseLimit `json:",omitempty"` } func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStatOut, error) { @@ -145,33 +145,33 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat } stat := rapi.Stat() - result.System = compareScopes(&stat.System, limits.System, percentage) - result.Transient = compareScopes(&stat.Transient, limits.Transient, percentage) + result.System = compareLimits(scopeToLimit(&stat.System), limits.System, percentage) + result.Transient = compareLimits(scopeToLimit(&stat.Transient), limits.Transient, percentage) if len(stat.Services) > 0 { - result.Services = make(map[string]network.ScopeStat, len(stat.Services)) + result.Services = make(map[string]rcmgr.BaseLimit, len(stat.Services)) for srv, stat := range stat.Services { ls := limits.Services[srv] - fstat := compareScopes(&stat, &ls, percentage) + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) if fstat != nil { result.Services[srv] = *fstat } } } if len(stat.Protocols) > 0 { - result.Protocols = make(map[string]network.ScopeStat, len(stat.Protocols)) + result.Protocols = make(map[string]rcmgr.BaseLimit, len(stat.Protocols)) for proto, stat := range stat.Protocols { ls := limits.Protocols[string(proto)] - fstat := compareScopes(&stat, &ls, percentage) + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) if fstat != nil { result.Protocols[string(proto)] = *fstat } } } if len(stat.Peers) > 0 { - result.Peers = make(map[string]network.ScopeStat, len(stat.Peers)) + result.Peers = make(map[string]rcmgr.BaseLimit, len(stat.Peers)) for p, stat := range stat.Peers { ls := limits.Peers[p.Pretty()] - fstat := compareScopes(&stat, &ls, percentage) + fstat := compareLimits(scopeToLimit(&stat), &ls, percentage) if fstat != nil { result.Peers[p.Pretty()] = *fstat } @@ -183,7 +183,7 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat case scope == config.ResourceMgrSystemScope: err = mgr.ViewSystem(func(s network.ResourceScope) error { stat := s.Stat() - result.System = &stat + result.System = scopeToLimit(&stat) return nil }) return result, err @@ -191,7 +191,7 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat case scope == config.ResourceMgrTransientScope: err = mgr.ViewTransient(func(s network.ResourceScope) error { stat := s.Stat() - result.Transient = &stat + result.Transient = scopeToLimit(&stat) return nil }) return result, err @@ -200,8 +200,8 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat svc := strings.TrimPrefix(scope, config.ResourceMgrServiceScopePrefix) err = mgr.ViewService(svc, func(s network.ServiceScope) error { stat := s.Stat() - result.Services = map[string]network.ScopeStat{ - svc: stat, + result.Services = map[string]rcmgr.BaseLimit{ + svc: *scopeToLimit(&stat), } return nil }) @@ -211,8 +211,8 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat proto := strings.TrimPrefix(scope, config.ResourceMgrProtocolScopePrefix) err = mgr.ViewProtocol(protocol.ID(proto), func(s network.ProtocolScope) error { stat := s.Stat() - result.Protocols = map[string]network.ScopeStat{ - proto: stat, + result.Protocols = map[string]rcmgr.BaseLimit{ + proto: *scopeToLimit(&stat), } return nil }) @@ -226,8 +226,8 @@ func NetStat(mgr network.ResourceManager, scope string, percentage int) (NetStat } err = mgr.ViewPeer(pid, func(s network.PeerScope) error { stat := s.Stat() - result.Peers = map[string]network.ScopeStat{ - p: stat, + result.Peers = map[string]rcmgr.BaseLimit{ + p: *scopeToLimit(&stat), } return nil }) @@ -246,40 +246,48 @@ var scopes = []string{ config.ResourceMgrPeerScopePrefix, } -func limitToScope(l rcmgr.BaseLimit) *network.ScopeStat { - return &network.ScopeStat{ - NumStreamsInbound: l.StreamsInbound, - NumStreamsOutbound: l.StreamsOutbound, - NumConnsInbound: l.ConnsInbound, - NumConnsOutbound: l.ConnsOutbound, - NumFD: l.FD, - Memory: l.Memory, +func scopeToLimit(s *network.ScopeStat) *rcmgr.BaseLimit { + return &rcmgr.BaseLimit{ + Streams: s.NumStreamsInbound + s.NumStreamsOutbound, + StreamsInbound: s.NumStreamsInbound, + StreamsOutbound: s.NumStreamsOutbound, + Conns: s.NumConnsInbound + s.NumConnsOutbound, + ConnsInbound: s.NumConnsInbound, + ConnsOutbound: s.NumConnsOutbound, + FD: s.NumFD, + Memory: s.Memory, } } -// compareScopes copares stat and limit. +// compareLimits copares stat and limit. // If any of the stats value are equals or above the specified percentage, // stat object is returned. -func compareScopes(stat, limit *network.ScopeStat, percentage int) *network.ScopeStat { +func compareLimits(stat, limit *rcmgr.BaseLimit, percentage int) *rcmgr.BaseLimit { if stat == nil || limit == nil { return nil } if abovePercentage(int(stat.Memory), int(limit.Memory), percentage) { return stat } - if abovePercentage(stat.NumConnsInbound, limit.NumConnsInbound, percentage) { + if abovePercentage(stat.ConnsInbound, limit.ConnsInbound, percentage) { return stat } - if abovePercentage(stat.NumConnsOutbound, limit.NumConnsOutbound, percentage) { + if abovePercentage(stat.ConnsOutbound, limit.ConnsOutbound, percentage) { return stat } - if abovePercentage(stat.NumFD, limit.NumFD, percentage) { + if abovePercentage(stat.Conns, limit.Conns, percentage) { return stat } - if abovePercentage(stat.NumStreamsInbound, limit.NumStreamsInbound, percentage) { + if abovePercentage(stat.FD, limit.FD, percentage) { return stat } - if abovePercentage(stat.NumStreamsOutbound, limit.NumStreamsOutbound, percentage) { + if abovePercentage(stat.StreamsInbound, limit.StreamsInbound, percentage) { + return stat + } + if abovePercentage(stat.StreamsOutbound, limit.StreamsOutbound, percentage) { + return stat + } + if abovePercentage(stat.Streams, limit.Streams, percentage) { return stat } @@ -312,41 +320,41 @@ func NetLimitAll(mgr network.ResourceManager) (*NetStatOut, error) { if err != nil { return nil, err } - result.System = limitToScope(s) + result.System = &s case config.ResourceMgrTransientScope: s, err := NetLimit(mgr, config.ResourceMgrSystemScope) if err != nil { return nil, err } - result.Transient = limitToScope(s) + result.Transient = &s case config.ResourceMgrServiceScopePrefix: - result.Services = make(map[string]network.ScopeStat) + result.Services = make(map[string]rcmgr.BaseLimit) for _, serv := range lister.ListServices() { s, err := NetLimit(mgr, config.ResourceMgrServiceScopePrefix+serv) if err != nil { return nil, err } - result.Services[serv] = *limitToScope(s) + result.Services[serv] = s } case config.ResourceMgrProtocolScopePrefix: - result.Protocols = make(map[string]network.ScopeStat) + result.Protocols = make(map[string]rcmgr.BaseLimit) for _, prot := range lister.ListProtocols() { ps := string(prot) s, err := NetLimit(mgr, config.ResourceMgrProtocolScopePrefix+ps) if err != nil { return nil, err } - result.Protocols[ps] = *limitToScope(s) + result.Protocols[ps] = s } case config.ResourceMgrPeerScopePrefix: - result.Peers = make(map[string]network.ScopeStat) + result.Peers = make(map[string]rcmgr.BaseLimit) for _, peer := range lister.ListPeers() { ps := peer.Pretty() s, err := NetLimit(mgr, config.ResourceMgrPeerScopePrefix+ps) if err != nil { return nil, err } - result.Peers[ps] = *limitToScope(s) + result.Peers[ps] = s } } } @@ -503,7 +511,7 @@ func NetResetLimit(mgr network.ResourceManager, repo repo.Repo, scope string) (r return result, fmt.Errorf("reading config to reset limit: %w", err) } - defaults := adjustedDefaultLimits(cfg.Swarm) + defaults := createDefaultLimitConfig(cfg.Swarm) if cfg.Swarm.ResourceMgr.Limits == nil { cfg.Swarm.ResourceMgr.Limits = &rcmgr.LimitConfig{} diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index 8dff7e0a43c..367ad3a57f5 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -6,6 +6,8 @@ import ( "github.com/ipfs/kubo/config" "github.com/libp2p/go-libp2p" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" + "github.com/pbnjay/memory" + "golang.org/x/sys/unix" ) // We are doing some magic when parsing config files (we are using a map[string]interface{} to compare config files). @@ -26,25 +28,107 @@ var infiniteBaseLimit = rcmgr.BaseLimit{ Memory: bigEnough, } +var noLimitIncrease = rcmgr.BaseLimitIncrease{ + ConnsInbound: 0, + ConnsOutbound: 0, + Conns: 0, + StreamsInbound: 0, + StreamsOutbound: 0, + Streams: 0, + Memory: 0, + FDFraction: 0, +} + // This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled -// adjustedDefaultLimits allows for tweaking defaults based on external factors, -// such as values in Swarm.ConnMgr.HiWater config. -func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { - defaultLimits := rcmgr.ScalingLimitConfig{ +// createDefaultLimitConfig creates LimitConfig to pass to libp2p's resource manager. +// libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. +// The intent of the default config here is to provide good defaults, +// and where the defaults aren't good enough, +// to expose a good set of higher-level "knobs" to users to satisfy most use cases +// without requiring users to wade into all the intricacies of libp2p's resource manager. +// +// The inputs one can specify in SwarmConfig are: +// - cfg.ResourceMgr.MaxMemory: This is the max amount of memory in bytes to allow libp2p to use. +// libp2p's resource manager will prevent additional resource creation while this limit is hit. +// If this value isn't specified, 1/8th of the total system memory is used. +// - cfg.ResourceMgr.MaxFileDescriptors: This is the maximum number of file descriptors to allow libp2p to use. +// libp2p's resource manager will prevent additional file descriptor consumption while this limit is hit. +// If this value isn't specified, the maximum between 1/2 of system FD limit and 4096 is used. +// - Swarm.ConnMgr.HighWater: If a connection manager is specified, libp2p's resource manager +// will allow 2x more connections than the HighWater mark +// so the connection manager has "space and time" to close "least useful" connections. +// +// With these inputs defined, limits are created at the system, transient, and peer scopes. +// Other scopes are ignored (by being set to infinity). +// The reason these scopes are chosen is because: +// - system - This gives us the coarse-grained control we want so we can reason about the system as a whole. +// It is the backstop, and allows us to reason about resource consumption more easily +// since don't have think about the interaction of many other scopes. +// - transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. +// - peer - The peer scope doesn't protect us against intentional DoS attacks. +// It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. +// We are reliant on the system scope for protection here in the malicious case. +// The reason for having a peer scope is to protect against unintentional DoS attacks +// (e.g., bug in a peer which is causing it to "misbehave"). +// In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. +// +// Within these scopes, limits are just set on memory, FD, and inbound connections/streams. +// Limits are set based on the inputs above. +// We trust this node to behave properly and thus ignore outbound connection/stream limits. +// We apply any limits that libp2p has for its protocols/services +// since we assume libp2p knows best here. +// +// This leaves 3 levels of resource management protection: +// 1. The user who does nothing and uses defaults - In this case they get some sane defaults +// based on the amount of memory and file descriptors their system has. +// This should protect the node from many attacks. +// 2. Slightly more advanced user - They can tweak the above by passing in config on +// maxMemory, maxFD, or maxConns with Swarm.HighWater.ConnMgr. +// 3. Power user - They specify all the limits they want set via Swarm.ResourceMgr.Limits +// and we don't do any defaults/overrides. We pass that config blindly into libp2p resource manager. +func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { + + if cfg.ResourceMgr.MaxMemory == 0 { + cfg.ResourceMgr.MaxMemory = int64(memory.TotalMemory()) / 8 + } + + if cfg.ResourceMgr.MaxFileDescriptors == 0 { + cfg.ResourceMgr.MaxFileDescriptors = getNumFDs() / 2 + } + + scalingLimitConfig := rcmgr.ScalingLimitConfig{ SystemBaseLimit: rcmgr.BaseLimit{ - Memory: rcmgr.DefaultLimits.SystemBaseLimit.Memory, - FD: rcmgr.DefaultLimits.SystemBaseLimit.FD, + Memory: cfg.ResourceMgr.MaxMemory, + FD: cfg.ResourceMgr.MaxFileDescriptors, - Conns: bigEnough, // just limit on the inbound + // By default, we just limit connections on the inbound side. + // Note that the limit gets adjusted below if "cfg.ConnMgr.HighWater" is set. + Conns: bigEnough, ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default ConnsOutbound: bigEnough, - // Don't limit streams. Rely on peer and transient limits. + // We limit streams since they not only take up memory and CPU. + // The Memory limit protects us on the memory side, + // but a StreamsInbound limit helps protect against unbound CPU consumption from stream processing. Streams: bigEnough, - StreamsInbound: bigEnough, + StreamsInbound: rcmgr.DefaultLimits.SystemBaseLimit.StreamsInbound, StreamsOutbound: bigEnough, }, + // Most limits don't see an increase because they're already infinite/bigEnough or at their max value. + // The values that should scale based on the amount of memory allocated to libp2p need to increase accordingly. + SystemLimitIncrease: rcmgr.BaseLimitIncrease{ + Memory: rcmgr.DefaultLimits.SystemLimitIncrease.Memory, + FDFraction: rcmgr.DefaultLimits.SystemLimitIncrease.FDFraction, + + Conns: 0, + ConnsInbound: rcmgr.DefaultLimits.SystemLimitIncrease.ConnsInbound, + ConnsOutbound: 0, + + Streams: 0, + StreamsInbound: rcmgr.DefaultLimits.SystemLimitIncrease.StreamsInbound, + StreamsOutbound: 0, + }, // Just go with what libp2p does TransientBaseLimit: rcmgr.DefaultLimits.TransientBaseLimit, @@ -52,33 +136,64 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { // Lets get out of the way of the allow list functionality. // If someone specified "Swarm.ResourceMgr.Allowlist" we should let it go through. - AllowlistedSystemBaseLimit: infiniteBaseLimit, - AllowlistedTransientBaseLimit: infiniteBaseLimit, + AllowlistedSystemBaseLimit: infiniteBaseLimit, + AllowlistedSystemLimitIncrease: noLimitIncrease, + + AllowlistedTransientBaseLimit: infiniteBaseLimit, + AllowlistedTransientLimitIncrease: noLimitIncrease, // Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Conn, or Stream limits. - ServiceBaseLimit: infiniteBaseLimit, - ServicePeerBaseLimit: infiniteBaseLimit, + ServiceBaseLimit: infiniteBaseLimit, + ServiceLimitIncrease: noLimitIncrease, + + ServicePeerBaseLimit: infiniteBaseLimit, + ServicePeerLimitIncrease: noLimitIncrease, + ProtocolBaseLimit: infiniteBaseLimit, - ProtocolPeerBaseLimit: infiniteBaseLimit, - ConnBaseLimit: infiniteBaseLimit, - StreamBaseLimit: infiniteBaseLimit, + ProtocolLimitIncrease: noLimitIncrease, + + ProtocolPeerBaseLimit: infiniteBaseLimit, + ProtocolPeerLimitIncrease: noLimitIncrease, + + ConnBaseLimit: infiniteBaseLimit, + ConnLimitIncrease: noLimitIncrease, - // Limit connections per peer. Really important to mitigate flooding attacks from a peer. + StreamBaseLimit: infiniteBaseLimit, + StreamLimitIncrease: noLimitIncrease, + + // Limit the resources consumed by a peer. + // This doesn't protect us against intentional DoS attacks since an attacker can easily spin up multiple peers. + // We specify this limit against unintentional DoS attacks (e.g., a peer has a bug and is sending too much traffic intentionally). + // In that case we want to keep that peer's resource consumption contained. + // To keep this simple, we only constrain inbound connections and connections. PeerBaseLimit: rcmgr.BaseLimit{ - Streams: bigEnough, - StreamsOutbound: bigEnough, - StreamsInbound: rcmgr.DefaultLimits.PeerBaseLimit.StreamsInbound, + Memory: bigEnough, + FD: bigEnough, Conns: bigEnough, ConnsInbound: rcmgr.DefaultLimits.PeerBaseLimit.ConnsInbound, ConnsOutbound: bigEnough, - FD: rcmgr.DefaultLimits.PeerBaseLimit.FD, - Memory: rcmgr.DefaultLimits.PeerBaseLimit.Memory, + Streams: bigEnough, + StreamsInbound: rcmgr.DefaultLimits.PeerBaseLimit.StreamsInbound, + StreamsOutbound: bigEnough, + }, + // Most limits don't see an increase because they're already infinite/bigEnough. + // The values that should scale based on the amount of memory allocated to libp2p need to increase accordingly. + PeerLimitIncrease: rcmgr.BaseLimitIncrease{ + Memory: 0, + FDFraction: 0, + Conns: 0, + ConnsInbound: rcmgr.DefaultLimits.PeerLimitIncrease.ConnsInbound, + ConnsOutbound: 0, + Streams: 0, + StreamsInbound: rcmgr.DefaultLimits.PeerLimitIncrease.StreamsInbound, + StreamsOutbound: 0, }, } - libp2p.SetDefaultServiceLimits(&defaultLimits) + // Whatever limits libp2p has specifically tuned for its protocols/services we'll apply. + libp2p.SetDefaultServiceLimits(&scalingLimitConfig) - defaultLimitConfig := defaultLimits.AutoScale() + defaultLimitConfig := scalingLimitConfig.Scale(cfg.ResourceMgr.MaxMemory, cfg.ResourceMgr.MaxFileDescriptors) // If a high water mark is set: if cfg.ConnMgr.Type == "basic" { @@ -89,3 +204,12 @@ func adjustedDefaultLimits(cfg config.SwarmConfig) rcmgr.LimitConfig { return defaultLimitConfig } + +func getNumFDs() int { + var l unix.Rlimit + if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil { + log.Errorw("failed to get fd limit", "error", err) + return 0 + } + return int(l.Cur) +} diff --git a/docs/config.md b/docs/config.md index 844109a799b..8ecae4ccf06 100644 --- a/docs/config.md +++ b/docs/config.md @@ -141,6 +141,8 @@ config file at runtime. - [`Swarm.ConnMgr.GracePeriod`](#swarmconnmgrgraceperiod) - [`Swarm.ResourceMgr`](#swarmresourcemgr) - [`Swarm.ResourceMgr.Enabled`](#swarmresourcemgrenabled) + - [`Swarm.ResourceMgr.MaxMemory`](#swarmresourcemgrmaxmemory) + - [`Swarm.ResourceMgr.MaxFileDescriptors`](#swarmresourcemgrmaxfiledescriptors) - [`Swarm.ResourceMgr.Limits`](#swarmresourcemgrlimits) - [`Swarm.ResourceMgr.Allowlist`](#swarmresourcemgrallowlist) - [`Swarm.Transports`](#swarmtransports) @@ -1800,35 +1802,40 @@ Type: `duration` ### `Swarm.ResourceMgr` -**EXPERIMENTAL: `Swarm.ResourceMgr` configuration will change in future release** - The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p-resource-manager#readme) allows setting limits per a scope, and tracking recource usage over time. #### `Swarm.ResourceMgr.Enabled` -**EXPERIMENTAL: `Swarm.ResourceMgr` is in active development, enable it only if you want to provide maintainers with feedback** - - Enables the libp2p Network Resource Manager and auguments the default limits using user-defined ones in `Swarm.ResourceMgr.Limits` (if present). Various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`) -Default: `false` +Default: `true` Type: `flag` -#### `Swarm.ResourceMgr.Limits` +#### `Swarm.ResourceMgr.MaxMemory` -**EXPERIMENTAL: `Swarm.ResourceMgr.Limits` configuration will change in future release, exposed here only for convenience** +Define the maximum amount of memory used by libp2p. -Map of resource limits [per scope](https://github.com/libp2p/go-libp2p-resource-manager#resource-scopes). +Default: `[TOTAL_MEMORY]/8` +Type: `integer64` + +#### `Swarm.ResourceMgr.MaxFileDescriptors` + +Define the maximum amount of file descriptors used by libp2p. + +Default `[TOTAL_FILE_DESCRIPTORS]/2` +Type: `integer` + +#### `Swarm.ResourceMgr.Limits` -The map supports fields from [`BasicLimiterConfig`](https://github.com/libp2p/go-libp2p-resource-manager/blob/v0.3.0/limit_config.go#L165-L185) -struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p-resource-manager#readme). +Map of resource limits [per scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes). -**Example: (format may change in future release)** +The map supports fields from [`ScalingLimitConfig`](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L21-L59) +struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#readme). ```json { @@ -1865,7 +1872,7 @@ Type: `object[string->object]` #### `Swarm.ResourceMgr.Allowlist` A list of multiaddrs that can bypass normal system limits (but are still limited by the allowlist scope). -Convenience config around [go-libp2p-resource-manager#Allowlist.Add](https://pkg.go.dev/github.com/libp2p/go-libp2p-resource-manager#Allowlist.Add). +Convenience config around [go-libp2p-resource-manager#Allowlist.Add](https://pkg.go.dev/github.com/libp2p/go-libp2p/p2p/host/resource-manager#Allowlist.Add). Default: `[]` diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index 290b1bdd3f0..66d308ede61 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -81,11 +81,13 @@ test_expect_success 'connected: swarm stats all working properly' ' test_expect_success 'ResourceMgr enabled: swarm stats' ' ipfs swarm stats all --enc=json | tee json && jq -e .System.Memory < json && - jq -e .System.NumConnsInbound < json && - jq -e .System.NumConnsOutbound < json && - jq -e .System.NumFD < json && - jq -e .System.NumStreamsInbound < json && - jq -e .System.NumStreamsOutbound < json && + jq -e .System.Conns < json && + jq -e .System.ConnsInbound < json && + jq -e .System.ConnsOutbound < json && + jq -e .System.FD < json && + jq -e .System.StreamsInbound < json && + jq -e .System.StreamsOutbound < json && + jq -e .System.Streams < json && jq -e .Transient.Memory < json ' From d844df9712b69f34aa0416ce754438b5e3a7e81f Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 19:53:22 +0100 Subject: [PATCH 11/14] go mod tidy Signed-off-by: Antonio Navarro Perez --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b9edd13050f..a24d78589d7 100644 --- a/go.mod +++ b/go.mod @@ -90,6 +90,7 @@ require ( github.com/multiformats/go-multicodec v0.7.0 github.com/multiformats/go-multihash v0.2.1 github.com/opentracing/opentracing-go v1.2.0 + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.13.0 github.com/stretchr/testify v1.8.0 @@ -209,7 +210,6 @@ require ( github.com/onsi/ginkgo v1.16.5 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/openzipkin/zipkin-go v0.4.0 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e // indirect github.com/prometheus/client_model v0.2.0 // indirect From e7c9800db919ea74c1dd8bc64a1eca30a50baf99 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 9 Nov 2022 20:05:32 +0100 Subject: [PATCH 12/14] Use file descriptors only on unix. Signed-off-by: Antonio Navarro Perez --- core/node/libp2p/fd/sys_not_unix.go | 7 +++++++ core/node/libp2p/fd/sys_unix.go | 16 ++++++++++++++++ core/node/libp2p/rcmgr_defaults.go | 13 ++----------- 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 core/node/libp2p/fd/sys_not_unix.go create mode 100644 core/node/libp2p/fd/sys_unix.go diff --git a/core/node/libp2p/fd/sys_not_unix.go b/core/node/libp2p/fd/sys_not_unix.go new file mode 100644 index 00000000000..c857987480d --- /dev/null +++ b/core/node/libp2p/fd/sys_not_unix.go @@ -0,0 +1,7 @@ +//go:build !linux && !darwin && !windows + +package fd + +func GetNumFDs() int { + return 0 +} diff --git a/core/node/libp2p/fd/sys_unix.go b/core/node/libp2p/fd/sys_unix.go new file mode 100644 index 00000000000..5e417c0fa6d --- /dev/null +++ b/core/node/libp2p/fd/sys_unix.go @@ -0,0 +1,16 @@ +//go:build linux || darwin +// +build linux darwin + +package fd + +import ( + "golang.org/x/sys/unix" +) + +func GetNumFDs() int { + var l unix.Rlimit + if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil { + return 0 + } + return int(l.Cur) +} diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index 367ad3a57f5..c1c94000d26 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -4,10 +4,10 @@ import ( "math" "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/core/node/libp2p/fd" "github.com/libp2p/go-libp2p" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" "github.com/pbnjay/memory" - "golang.org/x/sys/unix" ) // We are doing some magic when parsing config files (we are using a map[string]interface{} to compare config files). @@ -94,7 +94,7 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { } if cfg.ResourceMgr.MaxFileDescriptors == 0 { - cfg.ResourceMgr.MaxFileDescriptors = getNumFDs() / 2 + cfg.ResourceMgr.MaxFileDescriptors = fd.GetNumFDs() / 2 } scalingLimitConfig := rcmgr.ScalingLimitConfig{ @@ -204,12 +204,3 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { return defaultLimitConfig } - -func getNumFDs() int { - var l unix.Rlimit - if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil { - log.Errorw("failed to get fd limit", "error", err) - return 0 - } - return int(l.Cur) -} From f7303471384ea6938a78d6dc1d56b855703882ed Mon Sep 17 00:00:00 2001 From: Antonio Navarro Date: Thu, 10 Nov 2022 10:13:11 +0000 Subject: [PATCH 13/14] Requested changes Signed-off-by: Antonio Navarro --- config/swarm.go | 4 ++-- core/node/libp2p/rcmgr.go | 21 +++++++++++++-------- core/node/libp2p/rcmgr_defaults.go | 30 ++++++++++++++++-------------- core/node/libp2p/rcmgr_logging.go | 7 ------- docs/config.md | 14 ++++++++------ test/sharness/t0139-swarm-rcmgr.sh | 4 ++-- 6 files changed, 41 insertions(+), 39 deletions(-) diff --git a/config/swarm.go b/config/swarm.go index 23def69d51e..63119282772 100644 --- a/config/swarm.go +++ b/config/swarm.go @@ -144,8 +144,8 @@ type ResourceMgr struct { Enabled Flag `json:",omitempty"` Limits *rcmgr.LimitConfig `json:",omitempty"` - MaxMemory int64 `json:",omitempty"` - MaxFileDescriptors int `json:",omitempty"` + MaxMemory OptionalString `json:",omitempty"` + MaxFileDescriptors OptionalInteger `json:",omitempty"` // A list of multiaddrs that can bypass normal system limits (but are still // limited by the allowlist scope). Convenience config around diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index 66c97807a03..49c9d382399 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -9,10 +9,6 @@ import ( "github.com/benbjohnson/clock" logging "github.com/ipfs/go-log/v2" - config "github.com/ipfs/kubo/config" - "github.com/ipfs/kubo/core/node/helpers" - "github.com/ipfs/kubo/repo" - "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" @@ -21,8 +17,11 @@ import ( rcmgrObs "github.com/libp2p/go-libp2p/p2p/host/resource-manager/obs" "github.com/multiformats/go-multiaddr" "go.opencensus.io/stats/view" - "go.uber.org/fx" + + config "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/core/node/helpers" + "github.com/ipfs/kubo/repo" ) const NetLimitDefaultFilename = "limit.json" @@ -53,7 +52,10 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { return nil, opts, fmt.Errorf("opening IPFS_PATH: %w", err) } - limits := createDefaultLimitConfig(cfg) + limits, err := createDefaultLimitConfig(cfg) + if err != nil { + return nil, opts, err + } if cfg.ResourceMgr.Limits != nil { l := *cfg.ResourceMgr.Limits @@ -259,7 +261,7 @@ func scopeToLimit(s *network.ScopeStat) *rcmgr.BaseLimit { } } -// compareLimits copares stat and limit. +// compareLimits compares stat and limit. // If any of the stats value are equals or above the specified percentage, // stat object is returned. func compareLimits(stat, limit *rcmgr.BaseLimit, percentage int) *rcmgr.BaseLimit { @@ -511,7 +513,10 @@ func NetResetLimit(mgr network.ResourceManager, repo repo.Repo, scope string) (r return result, fmt.Errorf("reading config to reset limit: %w", err) } - defaults := createDefaultLimitConfig(cfg.Swarm) + defaults, err := createDefaultLimitConfig(cfg.Swarm) + if err != nil { + return result, fmt.Errorf("creating default limit config: %w", err) + } if cfg.Swarm.ResourceMgr.Limits == nil { cfg.Swarm.ResourceMgr.Limits = &rcmgr.LimitConfig{} diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index c1c94000d26..3ff8b55dd26 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -3,11 +3,13 @@ package libp2p import ( "math" - "github.com/ipfs/kubo/config" - "github.com/ipfs/kubo/core/node/libp2p/fd" + "github.com/dustin/go-humanize" "github.com/libp2p/go-libp2p" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" "github.com/pbnjay/memory" + + "github.com/ipfs/kubo/config" + "github.com/ipfs/kubo/core/node/libp2p/fd" ) // We are doing some magic when parsing config files (we are using a map[string]interface{} to compare config files). @@ -87,20 +89,20 @@ var noLimitIncrease = rcmgr.BaseLimitIncrease{ // maxMemory, maxFD, or maxConns with Swarm.HighWater.ConnMgr. // 3. Power user - They specify all the limits they want set via Swarm.ResourceMgr.Limits // and we don't do any defaults/overrides. We pass that config blindly into libp2p resource manager. -func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { - - if cfg.ResourceMgr.MaxMemory == 0 { - cfg.ResourceMgr.MaxMemory = int64(memory.TotalMemory()) / 8 +func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) { + maxMemoryDefaultString := humanize.Bytes(uint64(memory.TotalMemory()) / 8) + maxMemoryString := cfg.ResourceMgr.MaxMemory.WithDefault(maxMemoryDefaultString) + maxMemory, err := humanize.ParseBytes(maxMemoryString) + if err != nil { + return rcmgr.LimitConfig{}, err } - if cfg.ResourceMgr.MaxFileDescriptors == 0 { - cfg.ResourceMgr.MaxFileDescriptors = fd.GetNumFDs() / 2 - } + numFD := cfg.ResourceMgr.MaxFileDescriptors.WithDefault(int64(fd.GetNumFDs()) / 2) scalingLimitConfig := rcmgr.ScalingLimitConfig{ SystemBaseLimit: rcmgr.BaseLimit{ - Memory: cfg.ResourceMgr.MaxMemory, - FD: cfg.ResourceMgr.MaxFileDescriptors, + Memory: int64(maxMemory), + FD: int(numFD), // By default, we just limit connections on the inbound side. // Note that the limit gets adjusted below if "cfg.ConnMgr.HighWater" is set. @@ -165,7 +167,7 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { // This doesn't protect us against intentional DoS attacks since an attacker can easily spin up multiple peers. // We specify this limit against unintentional DoS attacks (e.g., a peer has a bug and is sending too much traffic intentionally). // In that case we want to keep that peer's resource consumption contained. - // To keep this simple, we only constrain inbound connections and connections. + // To keep this simple, we only constrain inbound connections and streams. PeerBaseLimit: rcmgr.BaseLimit{ Memory: bigEnough, FD: bigEnough, @@ -193,7 +195,7 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { // Whatever limits libp2p has specifically tuned for its protocols/services we'll apply. libp2p.SetDefaultServiceLimits(&scalingLimitConfig) - defaultLimitConfig := scalingLimitConfig.Scale(cfg.ResourceMgr.MaxMemory, cfg.ResourceMgr.MaxFileDescriptors) + defaultLimitConfig := scalingLimitConfig.Scale(int64(maxMemory), int(numFD)) // If a high water mark is set: if cfg.ConnMgr.Type == "basic" { @@ -202,5 +204,5 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) rcmgr.LimitConfig { log.Info("adjusted default resource manager System.Conns limits to match ConnMgr.HighWater value of %s", cfg.ConnMgr.HighWater) } - return defaultLimitConfig + return defaultLimitConfig, nil } diff --git a/core/node/libp2p/rcmgr_logging.go b/core/node/libp2p/rcmgr_logging.go index 66e22c64a29..34742dd1d7f 100644 --- a/core/node/libp2p/rcmgr_logging.go +++ b/core/node/libp2p/rcmgr_logging.go @@ -23,7 +23,6 @@ type loggingResourceManager struct { mut sync.Mutex limitExceededErrs map[string]int - previousErrors bool } type loggingScope struct { @@ -51,7 +50,6 @@ func (n *loggingResourceManager) start(ctx context.Context) { n.limitExceededErrs = make(map[string]int) for e, count := range errs { - n.previousErrors = true n.logger.Errorf("Resource limits were exceeded %d times with error %q.", count, e) } @@ -59,11 +57,6 @@ func (n *loggingResourceManager) start(ctx context.Context) { n.logger.Errorf("Consider inspecting logs and raising the resource manager limits. Documentation: https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmresourcemgr") } - if len(errs) == 0 && n.previousErrors { - n.previousErrors = false - n.logger.Errorf("Resrouce limits are no longer being exceeded.") - } - n.mut.Unlock() case <-ctx.Done(): return diff --git a/docs/config.md b/docs/config.md index 8ecae4ccf06..af7e7c672e9 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1818,17 +1818,19 @@ Type: `flag` #### `Swarm.ResourceMgr.MaxMemory` -Define the maximum amount of memory used by libp2p. +The maximum amount of memory that the libp2p resource manager will allow. -Default: `[TOTAL_MEMORY]/8` -Type: `integer64` +Default: `[TOTAL_SYSTEM_MEMORY]/8` +Type: `optionalBytes` #### `Swarm.ResourceMgr.MaxFileDescriptors` -Define the maximum amount of file descriptors used by libp2p. +Define the maximum number of file descriptors that libp2p can use. -Default `[TOTAL_FILE_DESCRIPTORS]/2` -Type: `integer` +This param is ignored on Windows. + +Default `[TOTAL_SYSTEM_FILE_DESCRIPTORS]/2` +Type: `optionalInteger` #### `Swarm.ResourceMgr.Limits` diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index 66d308ede61..ca63639577e 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -81,13 +81,13 @@ test_expect_success 'connected: swarm stats all working properly' ' test_expect_success 'ResourceMgr enabled: swarm stats' ' ipfs swarm stats all --enc=json | tee json && jq -e .System.Memory < json && + jq -e .System.FD < json && jq -e .System.Conns < json && jq -e .System.ConnsInbound < json && jq -e .System.ConnsOutbound < json && - jq -e .System.FD < json && + jq -e .System.Streams < json && jq -e .System.StreamsInbound < json && jq -e .System.StreamsOutbound < json && - jq -e .System.Streams < json && jq -e .Transient.Memory < json ' From f9b7a7931fbb785a3143c675394da421f5d6338b Mon Sep 17 00:00:00 2001 From: Antonio Navarro Date: Thu, 10 Nov 2022 10:31:44 +0000 Subject: [PATCH 14/14] Fix getFD on Windows Signed-off-by: Antonio Navarro --- core/node/libp2p/fd/sys_windows.go | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 core/node/libp2p/fd/sys_windows.go diff --git a/core/node/libp2p/fd/sys_windows.go b/core/node/libp2p/fd/sys_windows.go new file mode 100644 index 00000000000..eec17f3883f --- /dev/null +++ b/core/node/libp2p/fd/sys_windows.go @@ -0,0 +1,11 @@ +//go:build windows + +package fd + +import ( + "math" +) + +func GetNumFDs() int { + return math.MaxInt +}