Skip to content

Commit

Permalink
Length 3 & 4 fast hashes (#1321)
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenberry committed Sep 23, 2024
1 parent 963d79b commit c6dddea
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 6 deletions.
2 changes: 1 addition & 1 deletion include/glaze/binary/read.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ namespace glz
if (bool(ctx.error)) [[unlikely]] {
return;
}
if (uint64_t(end - it) < n) [[unlikely]] {
if (uint64_t(end - it) < n || it == end) [[unlikely]] {
ctx.error = error_code::unexpected_end;
return;
}
Expand Down
137 changes: 132 additions & 5 deletions include/glaze/core/refl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,10 +832,16 @@ namespace glz::detail
unique_index, // A unique character index is used
front_hash, // Hash on the front bytes of the key
single_element, // Map is a single element
mod4, // c % 4
xor_mod4, // (c ^ c0) % 4
minus_mod4, // (c - c0) % 4
three_element_unique_index,
unique_per_length, // Hash on a unique character index and the length of the key
full_flat // Full key hash with a single table
};

// For N == 3 and N == 4 it is cheap to check mod4, xor_mod4, and minus_mod4 hashes.
// Consecuative values like "x", "y", "z" for keys work with minus_mod4

struct unique_per_length_t
{
Expand Down Expand Up @@ -932,6 +938,15 @@ namespace glz::detail
case single_element: {
return 0;
}
case mod4: {
return 0;
}
case xor_mod4: {
return 0;
}
case minus_mod4: {
return 0;
}
case three_element_unique_index: {
return 0;
}
Expand Down Expand Up @@ -1309,6 +1324,45 @@ namespace glz::detail
}

// N == 2 is optimized within other hashing methods

if constexpr (N == 3 || N == 4) {
if (info.min_length > 0) {
bool valid = true;
for (size_t i = 0; i < N; ++i) {
if (keys[i][0] % 4 != uint8_t(i)) {
valid = false;
}
}
if (valid) {
info.type = mod4;
return info;
}

const auto c0 = keys[0][0];

valid = true;
for (size_t i = 0; i < N; ++i) {
if ((keys[i][0] ^ c0) % 4 != uint8_t(i)) {
valid = false;
}
}
if (valid) {
info.type = xor_mod4;
return info;
}

valid = true;
for (size_t i = 0; i < N; ++i) {
if ((keys[i][0] - c0) % 4 != uint8_t(i)) {
valid = false;
}
}
if (valid) {
info.type = minus_mod4;
return info;
}
}
}

auto& seed = info.seed;
constexpr uint64_t invalid_seed = 0;
Expand Down Expand Up @@ -1336,12 +1390,11 @@ namespace glz::detail
return info;
}
}
// Otherwise we failed to find a seed and we'll use another algorithm
}
else {
info.type = unique_index;
return info;
// Otherwise we failed to find a seed and we'll use a normal unique_index hash
}

info.type = unique_index;
return info;
}

if (front_bytes_hash_info<uint16_t>(keys, info)) {
Expand Down Expand Up @@ -1507,6 +1560,24 @@ namespace glz::detail
info.max_length = k_info.max_length;
return info;
}
else if constexpr (type == mod4) {
hash_info_t<T, bucket_size(mod4, N)> info{.type = mod4};
info.min_length = k_info.min_length;
info.max_length = k_info.max_length;
return info;
}
else if constexpr (type == xor_mod4) {
hash_info_t<T, bucket_size(xor_mod4, N)> info{.type = xor_mod4};
info.min_length = k_info.min_length;
info.max_length = k_info.max_length;
return info;
}
else if constexpr (type == minus_mod4) {
hash_info_t<T, bucket_size(minus_mod4, N)> info{.type = minus_mod4};
info.min_length = k_info.min_length;
info.max_length = k_info.max_length;
return info;
}
else if constexpr (type == three_element_unique_index) {
hash_info_t<T, bucket_size(three_element_unique_index, N)> info{.type = three_element_unique_index};
info.min_length = k_info.min_length;
Expand Down Expand Up @@ -1649,6 +1720,34 @@ namespace glz::detail
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& /*it*/, auto&& /*end*/) noexcept { return 0; }
};

template <class T, auto HashInfo>
struct decode_hash<json, T, HashInfo, hash_type::mod4>
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& /*end*/) noexcept {
return uint8_t(*it) % 4;
}
};

template <class T, auto HashInfo>
struct decode_hash<json, T, HashInfo, hash_type::xor_mod4>
{
static constexpr auto first_key_char = refl<T>.keys[0][0];

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& /*end*/) noexcept {
return (uint8_t(*it) ^ first_key_char) % 4;
}
};

template <class T, auto HashInfo>
struct decode_hash<json, T, HashInfo, hash_type::minus_mod4>
{
static constexpr auto first_key_char = refl<T>.keys[0][0];

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& /*end*/) noexcept {
return (uint8_t(*it) - first_key_char) % 4;
}
};

template <class T, auto HashInfo>
struct decode_hash<json, T, HashInfo, hash_type::unique_index>
Expand Down Expand Up @@ -1842,6 +1941,34 @@ namespace glz::detail
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&&, auto&&, const size_t) noexcept { return 0; }
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::mod4>
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept {
return uint8_t(*it) % 4;
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::xor_mod4>
{
static constexpr auto first_key_char = refl<T>.keys[0][0];

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept {
return (uint8_t(*it) ^ first_key_char) % 4;
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::minus_mod4>
{
static constexpr auto first_key_char = refl<T>.keys[0][0];

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept {
return (uint8_t(*it) - first_key_char) % 4;
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::unique_index>
Expand Down
15 changes: 15 additions & 0 deletions tests/json_test/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9511,6 +9511,21 @@ suite meta_keys_tests = [] {
};
};

struct point3d
{
int x{}, y{}, z{};
};

suite simple_mod_hashes = [] {
"mod hash"_test = [] {
point3d obj{};
expect(not glz::read_json(obj, R"({"x":1,"y":2,"z":3})"));
expect(obj.x == 1);
expect(obj.y == 2);
expect(obj.z == 3);
};
};

int main()
{
trace.end("json_test");
Expand Down

0 comments on commit c6dddea

Please sign in to comment.