-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add metrics reporting #37
base: master
Are you sure you want to change the base?
Changes from 2 commits
1d8019d
bdb78a4
443ab7b
b68529c
9e2000b
487d482
cf1e083
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
module github.com/libp2p/go-libp2p-kbucket | ||
|
||
require ( | ||
github.com/btcsuite/btcutil v0.0.0-20190207003914-4c204d697803 | ||
github.com/ipfs/go-ipfs-util v0.0.1 | ||
github.com/ipfs/go-log v0.0.1 | ||
github.com/libp2p/go-libp2p-core v0.0.1 | ||
github.com/libp2p/go-libp2p-peerstore v0.1.0 | ||
github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16 | ||
go.opencensus.io v0.22.0 | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
package metrics | ||
|
||
import ( | ||
"context" | ||
"github.com/btcsuite/btcutil/base58" | ||
"go.opencensus.io/stats" | ||
"go.opencensus.io/stats/view" | ||
"go.opencensus.io/tag" | ||
"time" | ||
) | ||
|
||
// Keys | ||
var ( | ||
keyLocalId, _ = tag.NewKey("local_dht_id") | ||
keyBucketIndex, _ = tag.NewKey("bucket_index") | ||
) | ||
|
||
// Constants for measure names are exported and can be used to | ||
// fetch views from the `DefaultViews` map. | ||
const ( | ||
MeasureBucketsFull = "libp2p.io/dht/k-bucket/full_buckets" | ||
MeasureBucketsNonEmpty = "libp2p.io/dht/k-bucket/non_empty_buckets" | ||
MeasureBucketUtilization = "libp2p.io/dht/k-bucket/utilization" | ||
MeasurePeerLatency = "libp2p.io/dht/k-bucket/peer_latency" | ||
MeasurePeersAdded = "libp2p.io/dht/k-bucket/peers_added" | ||
MeasurePeersRejectedHighLatency = "libp2p.io/dht/k-bucket/peers_rejected_high_latency" | ||
MeasurePeersRejectedNoCapacity = "libp2p.io/dht/k-bucket/peers_rejected_capacity" | ||
MeasurePeersRefreshed = "libp2p.io/dht/k-bucket/peers_refreshed" | ||
MeasurePeersRemoved = "libp2p.io/dht/k-bucket/peers_removed" | ||
) | ||
|
||
// Measures are exported so that consumers can create their own views if the `DefaultViews` | ||
// aren't sufficient. However, they should be updated using the functions below to avoid | ||
// leaking OpenCensus cruft throughout the rest of the code. | ||
var ( | ||
KBucketsFull = stats.Int64(MeasureBucketsFull, | ||
"Number of k-buckets that are at capacity (have k entries).", stats.UnitDimensionless) | ||
KBucketsNonEmpty = stats.Int64(MeasureBucketsNonEmpty, | ||
"Number of k-buckets with at least one entry.", stats.UnitDimensionless) | ||
KBucketUtilization = stats.Int64(MeasureBucketUtilization, | ||
"Number of entries per k-bucket.", stats.UnitDimensionless) | ||
KBucketPeerLatency = stats.Float64(MeasurePeerLatency, | ||
"Recorded latency measurements for all peers added to each bucket, per bucket.", stats.UnitMilliseconds) | ||
KBucketPeersAdded = stats.Int64(MeasurePeersAdded, | ||
"Number of peers added to each k-bucket (cumulative). Note that peers can be counted twice if added and removed.", stats.UnitDimensionless) | ||
KBucketPeersRejectedHighLatency = stats.Int64(MeasurePeersRejectedHighLatency, | ||
"Number of peers rejected from k-buckets due to high latency.", stats.UnitDimensionless) | ||
KBucketPeersRejectedNoCapacity = stats.Int64(MeasurePeersRejectedNoCapacity, | ||
"Number of peers rejected from the routing table because their k-bucket was full.", stats.UnitDimensionless) | ||
KBucketPeersRefreshed = stats.Int64(MeasurePeersRefreshed, | ||
"Number of peers moved to the front of a k-bucket that they were already in.", stats.UnitDimensionless) | ||
KBucketPeersRemoved = stats.Int64(MeasurePeersRemoved, | ||
"Number of peers removed from each k-bucket (cumulative). "+ | ||
"Peers may be counted twice if added and removed.", stats.UnitDimensionless) | ||
) | ||
|
||
// LocalContext returns `ctx` tagged with the local dht `id` for metrics reporting. | ||
func LocalContext(ctx context.Context, id []byte) context.Context { | ||
pretty := base58.Encode(id) | ||
ctx, _ = tag.New(ctx, tag.Upsert(keyLocalId, pretty)) | ||
return ctx | ||
} | ||
|
||
// RecordBucketsFull records the current number of buckets `n` that are at capacity. | ||
func RecordBucketsFull(ctx context.Context, n int) { | ||
stats.Record(ctx, KBucketsFull.M(int64(n))) | ||
} | ||
|
||
// RecordBucketsNonEmpty records the current number of buckets `n` that have at least one entry. | ||
func RecordBucketsNonEmpty(ctx context.Context, n int) { | ||
stats.Record(ctx, KBucketsNonEmpty.M(int64(n))) | ||
} | ||
|
||
// recordWithBucketIndex is a helper func that applies a tag to the measurements `ms` | ||
// indicating the index of the bucket to which the measurement applies. | ||
func recordWithBucketIndex(ctx context.Context, bucketIndex int, ms ...stats.Measurement) { | ||
_ = stats.RecordWithTags(ctx, | ||
[]tag.Mutator{tag.Upsert(keyBucketIndex, string(bucketIndex))}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
ms..., | ||
) | ||
} | ||
|
||
// RecordBucketUtilization records the current number of entries `n` for | ||
// the given `bucketIndex`. | ||
func RecordBucketUtilization(ctx context.Context, bucketIndex int, n int) { | ||
recordWithBucketIndex(ctx, bucketIndex, KBucketUtilization.M(int64(n))) | ||
} | ||
|
||
// RecordPeerAdded records that a peer was added to the bucket with index `bucketIndex`. | ||
// It also records the `measuredLatency` of the peer at the time they were added. | ||
func RecordPeerAdded(ctx context.Context, bucketIndex int, measuredLatency time.Duration) { | ||
recordWithBucketIndex(ctx, bucketIndex, | ||
KBucketPeersAdded.M(1), | ||
KBucketPeerLatency.M(float64(measuredLatency/time.Millisecond))) | ||
} | ||
|
||
// RecordPeerRejectedHighLatency records that a peer was rejected from the bucket with | ||
// index `bucketIndex` because their measured connection latency was too high. | ||
func RecordPeerRejectedHighLatency(ctx context.Context, bucketIndex int) { | ||
recordWithBucketIndex(ctx, bucketIndex, KBucketPeersRejectedHighLatency.M(1)) | ||
} | ||
|
||
// RecordPeerRejectedNoCapacity records that a peer was rejected from the bucket with | ||
// index `bucketIndex` because the bucket was full, and no members were eligible for eviction. | ||
func RecordPeerRejectedNoCapacity(ctx context.Context, bucketIndex int) { | ||
recordWithBucketIndex(ctx, bucketIndex, KBucketPeersRejectedNoCapacity.M(1)) | ||
} | ||
|
||
// RecordPeerRefreshed records that a peer in the bucket with index `bucketIndex` | ||
// had its "last seen" status updated, moving it to the head of its bucket. | ||
func RecordPeerRefreshed(ctx context.Context, bucketIndex int) { | ||
recordWithBucketIndex(ctx, bucketIndex, KBucketPeersRefreshed.M(1)) | ||
} | ||
|
||
// RecordPeerRemoved records that a peer was removed from the bucket with | ||
// index `bucketIndex`. | ||
func RecordPeerRemoved(ctx context.Context, bucketIndex int) { | ||
recordWithBucketIndex(ctx, bucketIndex, KBucketPeersRemoved.M(1)) | ||
} | ||
|
||
var DefaultViews = map[string]*view.View{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initialisation of views should be separate from the construction of the default views. Also |
||
MeasureBucketsFull: { | ||
Measure: KBucketsFull, | ||
TagKeys: []tag.Key{keyLocalId}, | ||
Aggregation: view.LastValue(), | ||
}, | ||
|
||
MeasureBucketsNonEmpty: { | ||
Measure: KBucketsNonEmpty, | ||
TagKeys: []tag.Key{keyLocalId}, | ||
Aggregation: view.LastValue(), | ||
}, | ||
|
||
MeasureBucketUtilization: { | ||
Measure: KBucketUtilization, | ||
TagKeys: []tag.Key{keyLocalId, keyBucketIndex}, | ||
Aggregation: view.LastValue(), | ||
}, | ||
|
||
MeasurePeersAdded: { | ||
Measure: KBucketPeersAdded, | ||
TagKeys: []tag.Key{keyLocalId, keyBucketIndex}, | ||
Aggregation: view.Count(), | ||
}, | ||
|
||
MeasurePeersRejectedHighLatency: { | ||
Measure: KBucketPeersRejectedHighLatency, | ||
TagKeys: []tag.Key{keyLocalId, keyBucketIndex}, | ||
Aggregation: view.Count(), | ||
}, | ||
|
||
MeasurePeersRejectedNoCapacity: { | ||
Measure: KBucketPeersRejectedNoCapacity, | ||
TagKeys: []tag.Key{keyLocalId, keyBucketIndex}, | ||
Aggregation: view.Count(), | ||
}, | ||
|
||
MeasurePeersRemoved: { | ||
Measure: KBucketPeersRemoved, | ||
TagKeys: []tag.Key{keyLocalId, keyBucketIndex}, | ||
Aggregation: view.Count(), | ||
}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,10 @@ | |
package kbucket | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"github.com/libp2p/go-libp2p-kbucket/metrics" | ||
"sync" | ||
"time" | ||
|
||
|
@@ -57,8 +59,10 @@ func NewRoutingTable(bucketsize int, localID ID, latency time.Duration, m peerst | |
return rt | ||
} | ||
|
||
// Update adds or moves the given peer to the front of its respective bucket | ||
func (rt *RoutingTable) Update(p peer.ID) (evicted peer.ID, err error) { | ||
// UpdateAndRecordMetrics adds or moves the given peer to the front of its respective bucket, while recording | ||
// metrics about bucket capacities and peer additions and removals. | ||
func (rt *RoutingTable) UpdateAndRecordMetrics(ctx context.Context, p peer.ID) (evicted peer.ID, err error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recording metrics shouldn't be considered 'extra' functionality, it is just a requirement of running a production system, if the rename is because of the change to the parameters, it is a common pattern to append There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I didn't like the new name and am glad there's an idiom I can use 😄 |
||
ctx = metrics.LocalContext(ctx, rt.local) | ||
peerID := ConvertPeerID(p) | ||
cpl := CommonPrefixLen(peerID, rt.local) | ||
|
||
|
@@ -69,23 +73,44 @@ func (rt *RoutingTable) Update(p peer.ID) (evicted peer.ID, err error) { | |
bucketID = len(rt.Buckets) - 1 | ||
} | ||
|
||
var full = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. something as intensive as this should maybe be enabled with a flag. could just be a module scope |
||
var nonEmpty = 0 | ||
for i, buck := range rt.Buckets { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, we could get a snapshot of all the buckets straight away, but it is really expensive. I think a better way to approach this would be to build up the view of all buckets one bucket at a time. So whichever bucket is chosen for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea - the measures for the # of full and non-empty buckets are redundant with the utilization measure anyway. I'll rewrite this to just record utilization for the buckets we actually visit in the Update method and remove the loop. |
||
n := buck.Len() | ||
metrics.RecordBucketUtilization(ctx, i, n) | ||
if n == 0 { | ||
continue | ||
} | ||
nonEmpty += 1 | ||
if n >= rt.bucketsize { | ||
full += 1 | ||
} | ||
} | ||
metrics.RecordBucketsNonEmpty(ctx, nonEmpty) | ||
metrics.RecordBucketsFull(ctx, full) | ||
|
||
bucket := rt.Buckets[bucketID] | ||
if bucket.Has(p) { | ||
// If the peer is already in the table, move it to the front. | ||
// This signifies that it it "more active" and the less active nodes | ||
// Will as a result tend towards the back of the list | ||
bucket.MoveToFront(p) | ||
metrics.RecordPeerRefreshed(ctx, bucketID) | ||
return "", nil | ||
} | ||
|
||
if rt.metrics.LatencyEWMA(p) > rt.maxLatency { | ||
peerLatency := rt.metrics.LatencyEWMA(p) | ||
|
||
if peerLatency > rt.maxLatency { | ||
// Connection doesnt meet requirements, skip! | ||
metrics.RecordPeerRejectedHighLatency(ctx, bucketID) | ||
return "", ErrPeerRejectedHighLatency | ||
} | ||
|
||
// We have enough space in the bucket (whether spawned or grouped). | ||
if bucket.Len() < rt.bucketsize { | ||
bucket.PushFront(p) | ||
metrics.RecordPeerAdded(ctx, bucketID, peerLatency) | ||
rt.PeerAdded(p) | ||
return "", nil | ||
} | ||
|
@@ -101,19 +126,33 @@ func (rt *RoutingTable) Update(p peer.ID) (evicted peer.ID, err error) { | |
bucket = rt.Buckets[bucketID] | ||
if bucket.Len() >= rt.bucketsize { | ||
// if after all the unfolding, we're unable to find room for this peer, scrap it. | ||
metrics.RecordPeerRejectedNoCapacity(ctx, bucketID) | ||
return "", ErrPeerRejectedNoCapacity | ||
} | ||
bucket.PushFront(p) | ||
metrics.RecordPeerAdded(ctx, bucketID, peerLatency) | ||
rt.PeerAdded(p) | ||
return "", nil | ||
} | ||
|
||
metrics.RecordPeerRejectedNoCapacity(ctx, bucketID) | ||
return "", ErrPeerRejectedNoCapacity | ||
} | ||
|
||
// Update adds or moves the given peer to the front of its respective bucket | ||
func (rt *RoutingTable) Update(p peer.ID) (evicted peer.ID, err error) { | ||
return rt.UpdateAndRecordMetrics(context.Background(), p) | ||
} | ||
|
||
// Remove deletes a peer from the routing table. This is to be used | ||
// when we are sure a node has disconnected completely. | ||
func (rt *RoutingTable) Remove(p peer.ID) { | ||
rt.RemoveAndRecordMetrics(context.Background(), p) | ||
} | ||
|
||
// RemoveAndRecordMetrics deletes a peer from the routing table, updating metrics on removal. | ||
// This is to be used when we are sure a node has disconnected completely. | ||
lanzafame marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func (rt *RoutingTable) RemoveAndRecordMetrics(ctx context.Context, p peer.ID) { | ||
peerID := ConvertPeerID(p) | ||
cpl := CommonPrefixLen(peerID, rt.local) | ||
|
||
|
@@ -127,6 +166,7 @@ func (rt *RoutingTable) Remove(p peer.ID) { | |
|
||
bucket := rt.Buckets[bucketID] | ||
if bucket.Remove(p) { | ||
metrics.RecordPeerRemoved(ctx, bucketID) | ||
rt.PeerRemoved(p) | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: If you are multi-lining a function call, put each parameter on a new line and not a combination. In this particular case, the third parameter is lost at the end of the line.