Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fast Keccak cache (wait-free and lockless) #7336

Merged
merged 53 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
280097e
More comments
Scooletz Aug 16, 2024
7ecd7a5
comments
Scooletz Aug 16, 2024
8dce0c7
updated
Scooletz Aug 16, 2024
dec33ca
EVM uses the cached keccak
Scooletz Aug 16, 2024
f5b756c
state tree
Scooletz Aug 16, 2024
df510fd
more tests
Scooletz Aug 16, 2024
154037e
special cases
Scooletz Aug 16, 2024
24a595b
Release with Volatile.Write
Scooletz Aug 16, 2024
790f86d
StorageTree added
Scooletz Aug 16, 2024
2593734
comments
Scooletz Aug 16, 2024
805e28c
one less CAS
Scooletz Aug 16, 2024
b01d503
more go-tos
Scooletz Aug 16, 2024
83e8489
Merge branch 'master' into keccak-cache
benaadams Aug 16, 2024
0118c1d
one less branch in hash
Scooletz Aug 16, 2024
af55085
Merge branch 'keccak-cache' of https://github.com/NethermindEth/nethe…
Scooletz Aug 16, 2024
21d7714
Merge branch 'master' into keccak-cache
benaadams Aug 16, 2024
2e74b76
Start with per instance random
benaadams Aug 16, 2024
a29dcec
Include length in hash seed
benaadams Aug 16, 2024
e654d13
Improve comment
benaadams Aug 16, 2024
c88d931
Move stackalloc out of common path and inline ComputeKey to it
benaadams Aug 17, 2024
6fcfb9a
Unify hashing
benaadams Aug 17, 2024
381511e
Use full hash
benaadams Aug 18, 2024
9e2ea27
Faster StorageCell equality
benaadams Aug 18, 2024
c3996a2
Less copy
benaadams Aug 18, 2024
669cec4
Merge branch 'master' into keccak-cache
benaadams Aug 18, 2024
d1483ce
Even less copy
benaadams Aug 18, 2024
2070cc2
Doesn't need to return
benaadams Aug 19, 2024
28e0fa4
Update alignment comments
benaadams Aug 19, 2024
89bd655
lol; don't do extra work
benaadams Aug 19, 2024
42ef8f6
Faster compares
benaadams Aug 19, 2024
773ef2a
Move other HasCodes to FastHash
benaadams Aug 19, 2024
226be3f
Special case 32 and 20 bytes hashes
benaadams Aug 19, 2024
7d5fba5
Word align vector compare
benaadams Aug 19, 2024
7184d26
constants
Scooletz Aug 19, 2024
795b4c9
stack reduced by 8 and one less comparison on read
Scooletz Aug 19, 2024
c19edb3
Use full entropy of HashCode for comparision
benaadams Aug 19, 2024
1f8b94f
Revert "Use full entropy of HashCode for comparision"
benaadams Aug 19, 2024
928929e
Tweaks
benaadams Aug 19, 2024
e32f995
comments
Scooletz Aug 19, 2024
6938aab
Align 32
benaadams Aug 19, 2024
545384f
Merge branch 'keccak-cache' of https://github.com/NethermindEth/nethe…
benaadams Aug 19, 2024
472ca26
smaller entry, bigger cache
Scooletz Aug 20, 2024
7d1bba6
Missed one
benaadams Aug 20, 2024
b8a6f7f
Merge branch 'master' into keccak-cache
benaadams Aug 20, 2024
9e8b635
Merge branch 'master' into keccak-cache
benaadams Aug 23, 2024
a536245
Merge branch 'master' into keccak-cache
benaadams Aug 31, 2024
50c1e8f
Faster FastHash
benaadams Aug 31, 2024
fc52fcc
alignment handled with if
Scooletz Sep 2, 2024
c11cc94
Merge branch 'master' into keccak-cache
benaadams Sep 11, 2024
ce44b4c
Merge branch 'master' into keccak-cache
benaadams Sep 11, 2024
a2a0155
Merge branch 'master' into keccak-cache
benaadams Sep 19, 2024
7363f2c
Missed one fastHash
benaadams Sep 19, 2024
70f5995
Add memory pressure
benaadams Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions src/Nethermind/Nethermind.Core.Test/KeccakCacheTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// SPDX-FileCopyrightText: 2022 Demerzel Solutions Limited
// SPDX-License-Identifier: LGPL-3.0-only

using System;
using System.Buffers.Binary;
using System.Linq;
using System.Threading.Tasks;
using FluentAssertions;
using Nethermind.Core.Crypto;
using Nethermind.Core.Extensions;
using NUnit.Framework;

namespace Nethermind.Core.Test
{
[TestFixture]
public class KeccakCacheTests
{
[Test]
public void Multiple()
{
const int spins = 10;

var random = new Random(13);
var bytes = new byte[31]; // misaligned length
random.NextBytes(bytes);

ValueHash256 expected = ValueKeccak.Compute(bytes);

for (int i = 0; i < spins; i++)
{
ValueHash256 actual = KeccakCache.Compute(bytes);
actual.Equals(expected).Should().BeTrue();
}
}

[Test]
public void Empty()
{
ReadOnlySpan<byte> span = ReadOnlySpan<byte>.Empty;
KeccakCache.Compute(span).Should().Be(ValueKeccak.Compute(span));
}

[Test]
public void Very_long()
{
ReadOnlySpan<byte> span = new byte[192];
KeccakCache.Compute(span).Should().Be(ValueKeccak.Compute(span));
}

[Test]
public void Collision()
{
var colliding = new[]
{
"f8ae910727f29363002d948385ff15bc6a9bacbef13bc2afc0aa8d02749668",
"baec2065df3da176cee21714b7bfb00d0c57f37a21daf2b2d4056f67270290",
"924cc47a10ad801c74a491b19492563d2351b285ff1679e5b5264e57b13bbb",
"4fb39f7800b3a43e4e722dc6fed03b126e0125d7ca713b0558564a29903ea9",
};

var collisions = colliding.Length;
var array = colliding.Select(c => Bytes.FromHexString(c)).ToArray();
var values = array.Select(a => ValueKeccak.Compute(a)).ToArray();

var bucket = KeccakCache.GetBucket(array[0]);

for (int i = 1; i < collisions; i++)
{
var input = array[i];
bucket.Should().Be(KeccakCache.GetBucket(input));
KeccakCache.Compute(input).Should().Be(values[i]);
}

Parallel.ForEach(array, (a, state, index) =>
{
ValueHash256 v = values[index];

for (int i = 0; i < 100_000; i++)
{
KeccakCache.Compute(a).Should().Be(v);
}
});
}

[Test]
public void Spin_through_all()
{
Span<byte> span = stackalloc byte[4];
for (int i = 0; i < KeccakCache.Count; i++)
{
BinaryPrimitives.WriteInt32LittleEndian(span, i);
KeccakCache.Compute(span).Should().Be(ValueKeccak.Compute(span));
}
}
}
}
208 changes: 208 additions & 0 deletions src/Nethermind/Nethermind.Core/Crypto/KeccakCache.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// SPDX-FileCopyrightText: 2024 Demerzel Solutions Limited
// SPDX-License-Identifier: LGPL-3.0-only

using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;

namespace Nethermind.Core.Crypto;

/// <summary>
/// This is a minimalistic one-way set associative cache for Keccak values.
///
/// It allocates only 8MB of memory to store 64k of entries.
/// No misaligned reads. Everything is aligned to both cache lines as well as to boundaries so no torn reads.
/// Requires a single CAS to lock and <see cref="Volatile.Write(ref int,int)"/> to unlock.
/// On lock failure, it just moves on with execution.
/// Uses copying on the stack to get the entry, have it copied and release the lock ASAP. This is 128 bytes to copy that quite likely will be the hit.
/// </summary>
public static unsafe class KeccakCache
{
private static readonly uint s_instanceRandom = (uint)System.Security.Cryptography.RandomNumberGenerator.GetInt32(int.MinValue, int.MaxValue);
benaadams marked this conversation as resolved.
Show resolved Hide resolved
/// <summary>
/// Count is defined as a +1 over bucket mask. In the future, just change the mask as the main parameter.
/// </summary>
public const int Count = BucketMask + 1;
private const int BucketMask = 0x0000_FFFF;
private const uint HashMask = unchecked((uint)~BucketMask);

private static readonly Entry* Memory;

static KeccakCache()
{
const UIntPtr size = Count * Entry.Size;

// Aligned, so that no torn reads if fields of Entry are properly aligned.
Memory = (Entry*)NativeMemory.AlignedAlloc(size, Entry.Size);
NativeMemory.Clear(Memory, size);
}

[SkipLocalsInit]
public static ValueHash256 Compute(ReadOnlySpan<byte> input)
{
Unsafe.SkipInit(out ValueHash256 hash);

// Special cases first
if (input.Length == 0)
{
hash = ValueKeccak.OfAnEmptyString;
goto Return;
}

if (input.Length > Entry.MaxPayloadLength)
{
hash = ValueKeccak.Compute(input);
goto Return;
}

var fast = FastHash(input);
var index = fast & BucketMask;

Debug.Assert(index < Count);

uint hashAndLength = (fast & HashMask) | (ushort)input.Length;

ref Entry e = ref Unsafe.Add(ref Unsafe.AsRef<Entry>(Memory), index);

// Read aligned, volatile, won't be torn, check with computed
if (Volatile.Read(ref e.HashAndLength) == hashAndLength)
{
// There's a possibility of a hit, try lock.
if (Interlocked.CompareExchange(ref e.Lock, Entry.Locked, Entry.Unlocked) == Entry.Unlocked)
{
if (e.HashAndLength != hashAndLength)
{
// The value has been changed between reading and taking a lock.
// Release the lock and compute.
Volatile.Write(ref e.Lock, Entry.Unlocked);
goto Compute;
}

// Local copy of 128 bytes, to release the lock as soon as possible and make a key comparison without holding it.
Entry copy = e;

// Release the lock
Volatile.Write(ref e.Lock, Entry.Unlocked);

// Lengths are equal, the input length can be used without any additional operation.
if (MemoryMarshal.CreateReadOnlySpan(ref copy.Payload, input.Length).SequenceEqual(input))
{
hash = copy.Value;
goto Return;
}
}
}

Compute:
hash = ValueKeccak.Compute(input);

// Try lock and memoize
if (Interlocked.CompareExchange(ref e.Lock, Entry.Locked, Entry.Unlocked) == Entry.Unlocked)
{
e.HashAndLength = hashAndLength;
e.Value = hash;

input.CopyTo(MemoryMarshal.CreateSpan(ref e.Payload, input.Length));

// Release the lock
Volatile.Write(ref e.Lock, Entry.Unlocked);
}

Return:
return hash;
}

/// <summary>
/// Gets the bucket for tests.
/// </summary>
public static uint GetBucket(ReadOnlySpan<byte> input) => FastHash(input) & BucketMask;

[SkipLocalsInit]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint FastHash(ReadOnlySpan<byte> input)
benaadams marked this conversation as resolved.
Show resolved Hide resolved
{
Debug.Assert(input.Length >= 1, "Cannot hash empty");

var length = input.Length;

ref var b = ref MemoryMarshal.GetReference(input);

// Start with first
uint hash = s_instanceRandom ^ b;

// This is done below, without branches
// if ((length & 1) == 1)
// {
// hash = b;
// b = ref Unsafe.Add(ref b, 1);
// length -= 1;
// }

var bit = length & 1;
b = ref Unsafe.Add(ref b, bit);
length -= bit;

if ((length & 2) == 2)
{
hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned<ushort>(ref b));
b = ref Unsafe.Add(ref b, 2);
length -= 2;
}
if ((length & 4) == 4)
{
hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned<uint>(ref b));
b = ref Unsafe.Add(ref b, 4);
length -= 4;
}

while (length > 0)
{
hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned<ulong>(ref b));
b = ref Unsafe.Add(ref b, 8);
length -= 8;
}

return hash;
}

/// <summary>
/// An entry to cache keccak
/// </summary>
[StructLayout(LayoutKind.Explicit, Size = Size)]
private struct Entry
{
public const int Unlocked = 0;
public const int Locked = 1;

/// <summary>
/// Should work for both ARM and x64 and be aligned.
/// </summary>
public const int Size = 128;

private const int PayloadStart = 8;
private const int ValueStart = Size - ValueHash256.MemorySize;
public const int MaxPayloadLength = ValueStart - PayloadStart;

[FieldOffset(0)]
public int Lock;

/// <summary>
/// The mix of hash and length allows for a fast comparison and a single volatile read.
/// The length is encoded as the low part, while the hash as the high part of uint.
/// </summary>
[FieldOffset(4)]
public uint HashAndLength;

[FieldOffset(PayloadStart)]
public byte Payload;

/// <summary>
/// The actual value
/// </summary>
[FieldOffset(ValueStart)]
public ValueHash256 Value;
}
}
3 changes: 2 additions & 1 deletion src/Nethermind/Nethermind.Evm/VirtualMachine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,8 @@ private CallResult ExecuteCode<TTracingInstructions, TTracingRefunds, TTracingSt
if (!UpdateMemoryCost(vmState, ref gasAvailable, in a, b)) goto OutOfGas;

bytes = vmState.Memory.LoadSpan(in a, b);
stack.PushBytes(ValueKeccak.Compute(bytes).BytesAsSpan);

stack.PushBytes(KeccakCache.Compute(bytes).BytesAsSpan);
break;
}
case Instruction.ADDRESS:
Expand Down
6 changes: 3 additions & 3 deletions src/Nethermind/Nethermind.State/StateTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ public StateTree(ITrieStore? store, ILogManager? logManager)
[DebuggerStepThrough]
public Account? Get(Address address, Hash256? rootHash = null)
{
ReadOnlySpan<byte> bytes = Get(ValueKeccak.Compute(address.Bytes).BytesAsSpan, rootHash);
ReadOnlySpan<byte> bytes = Get(KeccakCache.Compute(address.Bytes).BytesAsSpan, rootHash);
return bytes.IsEmpty ? null : _decoder.Decode(bytes);
}

[DebuggerStepThrough]
public bool TryGetStruct(Address address, out AccountStruct account, Hash256? rootHash = null)
{
ReadOnlySpan<byte> bytes = Get(ValueKeccak.Compute(address.Bytes).BytesAsSpan, rootHash);
ReadOnlySpan<byte> bytes = Get(KeccakCache.Compute(address.Bytes).BytesAsSpan, rootHash);
Rlp.ValueDecoderContext valueDecoderContext = new Rlp.ValueDecoderContext(bytes);
if (bytes.IsEmpty)
{
Expand All @@ -69,7 +69,7 @@ public bool TryGetStruct(Address address, out AccountStruct account, Hash256? ro

public void Set(Address address, Account? account)
{
ValueHash256 keccak = ValueKeccak.Compute(address.Bytes);
ValueHash256 keccak = KeccakCache.Compute(address.Bytes);
Set(keccak.BytesAsSpan, account is null ? null : account.IsTotallyEmpty ? EmptyAccountRlp : Rlp.Encode(account));
}

Expand Down
15 changes: 10 additions & 5 deletions src/Nethermind/Nethermind.State/StorageTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
using System.Collections.Frozen;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using Nethermind.Core.Crypto;
using Nethermind.Core.Extensions;
using Nethermind.Logging;
Expand Down Expand Up @@ -46,12 +48,15 @@ public StorageTree(IScopedTrieStore? trieStore, Hash256 rootHash, ILogManager? l
TrieType = TrieType.Storage;
}

private static void ComputeKey(in UInt256 index, ref Span<byte> key)
private static void ComputeKey(in UInt256 index, in Span<byte> key)
{
index.ToBigEndian(key);

// in situ calculation
KeccakHash.ComputeHashBytesToSpan(key, key);
ValueHash256 keyHash = KeccakCache.Compute(key);

// Assign to update the argument
Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(key))
= Unsafe.As<ValueHash256, Vector256<byte>>(ref keyHash);
}

[SkipLocalsInit]
Expand All @@ -63,7 +68,7 @@ public byte[] Get(in UInt256 index, Hash256? storageRoot = null)
}

Span<byte> key = stackalloc byte[32];
ComputeKey(index, ref key);
ComputeKey(index, key);
return GetArray(key, storageRoot);
}

Expand Down Expand Up @@ -92,7 +97,7 @@ public void Set(in UInt256 index, byte[] value)
else
{
Span<byte> key = stackalloc byte[32];
ComputeKey(index, ref key);
ComputeKey(index, in key);
SetInternal(key, value);
}
}
Expand Down