Skip to content

Commit

Permalink
Added prefix match for consistent hash
Browse files Browse the repository at this point in the history
  • Loading branch information
bobhansen committed Oct 4, 2018
1 parent 24b0969 commit d6d0da0
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 16 deletions.
104 changes: 98 additions & 6 deletions consistenthash/consistenthash.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,78 @@ package consistenthash

import (
"hash/crc32"
"math/bits"
"sort"
"strconv"
)

type Hash func(data []byte) uint32

const defaultHashExpansion = 6

type Map struct {
hash Hash
// Inputs

// hash is the hash function that will be applied to both added
// keys and fetched keys
hash Hash

// replicas is the number of virtual nodes that will be inserted
// into the consistent hash ring for each key added
replicas int
keys []int // Sorted
hashMap map[int]string

// prefixTableExpansion is the multiple of virtual nodes that
// will be inserted into the internal hash table for O(1) lookups.
prefixTableExpansion int

// Internal data

// keys is the hash of the virtual nodes, sorted by hash value
keys []int // Sorted

// hashMap maps the hashed keys back to the input strings.
// Note that all virtual nodes will map back to the same input
// string
hashMap map[int]string

// prefixShift is the number of bits an input hash should
// be right-shifted to act as a lookup in the prefixTable
prefixShift uint32

// prefixTable is a map of the most significant bits of
// a hash value to output all hashes with that prefix
// map to. If the result is ambiguous (i.e. there is a
// hash range split within this prefix) the value will
// be blank and we should fall back to a binary search
// through keys to find the exact output
prefixTable []string
}

// New returns a blank consistent hash ring that will return
// the key whose hash comes next after the hash of the input to
// Map.Get.
// Increasing the number of replicas will improve the smoothness
// of the hash ring and reduce the data moved when adding/removing
// nodes, at the cost of more memory.
func New(replicas int, fn Hash) *Map {
return NewConsistentHash(replicas, defaultHashExpansion, fn)
}

// NewConsistentHash returns a blank consistent hash ring that will return
// the key whose hash comes next after the hash of the input to
// Map.Get.
// Increasing the number of replicas will improve the smoothness
// of the hash ring and reduce the data moved when adding/removing
// nodes.
// Increasing the tableExpansion will allocate more entries in the
// internal hash table, reducing the frequency of lg(n) binary
// searches during calls to the Map.Get method.
func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map {
m := &Map{
replicas: replicas,
hash: fn,
hashMap: make(map[int]string),
replicas: replicas,
hash: fn,
hashMap: make(map[int]string),
prefixTableExpansion: tableExpansion,
}
if m.hash == nil {
m.hash = crc32.ChecksumIEEE
Expand All @@ -59,6 +113,37 @@ func (m *Map) Add(keys ...string) {
}
}
sort.Ints(m.keys)

// Find minimum number of bits to hold |keys| * prefixTableExpansion
prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion)))
m.prefixShift = 32 - prefixBits

prefixTableSize := 1 << prefixBits
m.prefixTable = make([]string, prefixTableSize)

previousKeyPrefix := -1 // Effectively -Inf
currentKeyIdx := 0
currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift

for i := range m.prefixTable {
if previousKeyPrefix < i && currentKeyPrefix > i {
// All keys with this prefix will map to a single value
m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]]
} else {
// Several keys might have the same prefix. Walk
// over them until it changes
previousKeyPrefix = currentKeyPrefix
for currentKeyPrefix == previousKeyPrefix {
currentKeyIdx++
if currentKeyIdx < len(m.keys) {
currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift
} else {
currentKeyIdx = 0
currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf
}
}
}
}
}

// Gets the closest item in the hash to the provided key.
Expand All @@ -69,6 +154,13 @@ func (m *Map) Get(key string) string {

hash := int(m.hash([]byte(key)))

// Look for the hash prefix in the prefix table
prefixSlot := hash >> m.prefixShift
tableResult := m.prefixTable[prefixSlot]
if len(tableResult) > 0 {
return tableResult
}

// Binary search for appropriate replica.
idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash })

Expand Down
26 changes: 16 additions & 10 deletions consistenthash/consistenthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestHashing(t *testing.T) {

// Override the hash function to return easier to reason about values. Assumes
// the keys can be converted to an integer.
hash := New(3, func(key []byte) uint32 {
hash := NewConsistentHash(3, 6, func(key []byte) uint32 {
i, err := strconv.Atoi(string(key))
if err != nil {
panic(err)
Expand Down Expand Up @@ -66,8 +66,8 @@ func TestHashing(t *testing.T) {
}

func TestConsistency(t *testing.T) {
hash1 := New(1, nil)
hash2 := New(1, nil)
hash1 := NewConsistentHash(1, 6, nil)
hash2 := NewConsistentHash(1, 6, nil)

hash1.Add("Bill", "Bob", "Bonny")
hash2.Add("Bob", "Bonny", "Bill")
Expand All @@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) {

}

func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32) }
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) }
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) }
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8, 6) }
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32, 6) }
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) }
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) }

func benchmarkGet(b *testing.B, shards int) {
func benchmarkGet(b *testing.B, shards int, expansion int) {

hash := New(50, nil)
hash := NewConsistentHash(50, expansion, nil)

var buckets []string
for i := 0; i < shards; i++ {
buckets = append(buckets, fmt.Sprintf("shard-%d", i))
}

testStringCount := shards
var testStrings []string
for i := 0; i < testStringCount; i++ {
testStrings = append(testStrings, fmt.Sprintf("%d", i))
}

hash.Add(buckets...)

b.ResetTimer()

for i := 0; i < b.N; i++ {
hash.Get(buckets[i&(shards-1)])
hash.Get(testStrings[i&(testStringCount-1)])
}
}

0 comments on commit d6d0da0

Please sign in to comment.