Added prefix match for consistent hash

golang · Oct 4, 2018 · d6d0da0 · d6d0da0
1 parent 24b0969
commit d6d0da0
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 16 deletions.
diff --git a/consistenthash/consistenthash.go b/consistenthash/consistenthash.go
@@ -19,24 +19,78 @@ package consistenthash
 
 import (
 	"hash/crc32"
+	"math/bits"
 	"sort"
 	"strconv"
 )
 
 type Hash func(data []byte) uint32
 
+const defaultHashExpansion = 6
+
 type Map struct {
-	hash     Hash
+	// Inputs
+
+	// hash is the hash function that will be applied to both added
+	// keys and fetched keys
+	hash Hash
+
+	// replicas is the number of virtual nodes that will be inserted
+	// into the consistent hash ring for each key added
 	replicas int
-	keys     []int // Sorted
-	hashMap  map[int]string
+
+	// prefixTableExpansion is the multiple of virtual nodes that
+	// will be inserted into the internal hash table for O(1) lookups.
+	prefixTableExpansion int
+
+	// Internal data
+
+	// keys is the hash of the virtual nodes, sorted by hash value
+	keys []int // Sorted
+
+	// hashMap maps the hashed keys back to the input strings.
+	// Note that all virtual nodes will map back to the same input
+	// string
+	hashMap map[int]string
+
+	// prefixShift is the number of bits an input hash should
+	// be right-shifted to act as a lookup in the prefixTable
+	prefixShift uint32
+
+	// prefixTable is a map of the most significant bits of
+	// a hash value to output all hashes with that prefix
+	// map to.  If the result is ambiguous (i.e. there is a
+	// hash range split within this prefix) the value will
+	// be blank and we should fall back to a binary search
+	// through keys to find the exact output
+	prefixTable []string
 }
 
+// New returns a blank consistent hash ring that will return
+// the key whose hash comes next after the hash of the input to
+// Map.Get.
+// Increasing the number of replicas will improve the smoothness
+// of the hash ring and reduce the data moved when adding/removing
+// nodes, at the cost of more memory.
 func New(replicas int, fn Hash) *Map {
+	return NewConsistentHash(replicas, defaultHashExpansion, fn)
+}
+
+// NewConsistentHash returns a blank consistent hash ring that will return
+// the key whose hash comes next after the hash of the input to
+// Map.Get.
+// Increasing the number of replicas will improve the smoothness
+// of the hash ring and reduce the data moved when adding/removing
+// nodes.
+// Increasing the tableExpansion will allocate more entries in the
+// internal hash table, reducing the frequency of lg(n) binary
+// searches during calls to the Map.Get method.
+func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map {
 	m := &Map{
-		replicas: replicas,
-		hash:     fn,
-		hashMap:  make(map[int]string),
+		replicas:             replicas,
+		hash:                 fn,
+		hashMap:              make(map[int]string),
+		prefixTableExpansion: tableExpansion,
 	}
 	if m.hash == nil {
 		m.hash = crc32.ChecksumIEEE
@@ -59,6 +113,37 @@ func (m *Map) Add(keys ...string) {
 		}
 	}
 	sort.Ints(m.keys)
+
+	// Find minimum number of bits to hold |keys| * prefixTableExpansion
+	prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion)))
+	m.prefixShift = 32 - prefixBits
+
+	prefixTableSize := 1 << prefixBits
+	m.prefixTable = make([]string, prefixTableSize)
+
+	previousKeyPrefix := -1 // Effectively -Inf
+	currentKeyIdx := 0
+	currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift
+
+	for i := range m.prefixTable {
+		if previousKeyPrefix < i && currentKeyPrefix > i {
+			// All keys with this prefix will map to a single value
+			m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]]
+		} else {
+			// Several keys might have the same prefix.  Walk
+			// over them until it changes
+			previousKeyPrefix = currentKeyPrefix
+			for currentKeyPrefix == previousKeyPrefix {
+				currentKeyIdx++
+				if currentKeyIdx < len(m.keys) {
+					currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift
+				} else {
+					currentKeyIdx = 0
+					currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf
+				}
+			}
+		}
+	}
 }
 
 // Gets the closest item in the hash to the provided key.
@@ -69,6 +154,13 @@ func (m *Map) Get(key string) string {
 
 	hash := int(m.hash([]byte(key)))
 
+	// Look for the hash prefix in the prefix table
+	prefixSlot := hash >> m.prefixShift
+	tableResult := m.prefixTable[prefixSlot]
+	if len(tableResult) > 0 {
+		return tableResult
+	}
+
 	// Binary search for appropriate replica.
 	idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash })
 

diff --git a/consistenthash/consistenthash_test.go b/consistenthash/consistenthash_test.go
@@ -26,7 +26,7 @@ func TestHashing(t *testing.T) {
 
 	// Override the hash function to return easier to reason about values. Assumes
 	// the keys can be converted to an integer.
-	hash := New(3, func(key []byte) uint32 {
+	hash := NewConsistentHash(3, 6, func(key []byte) uint32 {
 		i, err := strconv.Atoi(string(key))
 		if err != nil {
 			panic(err)
@@ -66,8 +66,8 @@ func TestHashing(t *testing.T) {
 }
 
 func TestConsistency(t *testing.T) {
-	hash1 := New(1, nil)
-	hash2 := New(1, nil)
+	hash1 := NewConsistentHash(1, 6, nil)
+	hash2 := NewConsistentHash(1, 6, nil)
 
 	hash1.Add("Bill", "Bob", "Bonny")
 	hash2.Add("Bob", "Bonny", "Bill")
@@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) {
 
 }
 
-func BenchmarkGet8(b *testing.B)   { benchmarkGet(b, 8) }
-func BenchmarkGet32(b *testing.B)  { benchmarkGet(b, 32) }
-func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) }
-func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) }
+func BenchmarkGet8(b *testing.B)   { benchmarkGet(b, 8, 6) }
+func BenchmarkGet32(b *testing.B)  { benchmarkGet(b, 32, 6) }
+func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) }
+func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) }
 
-func benchmarkGet(b *testing.B, shards int) {
+func benchmarkGet(b *testing.B, shards int, expansion int) {
 
-	hash := New(50, nil)
+	hash := NewConsistentHash(50, expansion, nil)
 
 	var buckets []string
 	for i := 0; i < shards; i++ {
 		buckets = append(buckets, fmt.Sprintf("shard-%d", i))
 	}
 
+	testStringCount := shards
+	var testStrings []string
+	for i := 0; i < testStringCount; i++ {
+		testStrings = append(testStrings, fmt.Sprintf("%d", i))
+	}
+
 	hash.Add(buckets...)
 
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		hash.Get(buckets[i&(shards-1)])
+		hash.Get(testStrings[i&(testStringCount-1)])
 	}
 }