Skip to content

Commit

Permalink
Revert "Replace std::hash with our custom hash function"
Browse files Browse the repository at this point in the history
  • Loading branch information
mewim committed Feb 28, 2024
1 parent 2f15b6a commit 15bc801
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 76 deletions.
36 changes: 7 additions & 29 deletions src/include/function/hash/hash_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "common/types/int128_t.h"
#include "common/types/interval_t.h"
#include "common/types/ku_string.h"
#include "common/types/types.h"
#include "common/vector/value_vector.h"

namespace kuzu {
Expand Down Expand Up @@ -134,52 +133,31 @@ inline void Hash::operation(
template<>
inline void Hash::operation(
const double& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
// 0 and -0 are not byte-equivalent, but should have the same hash
if (key == 0) {
result = murmurhash64(0);
} else {
result = murmurhash64(*reinterpret_cast<const uint64_t*>(&key));
}
result = std::hash<double>()(key);
}

template<>
inline void Hash::operation(
const float& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
// 0 and -0 are not byte-equivalent, but should have the same hash
if (key == 0) {
result = murmurhash64(0);
} else {
result = murmurhash64(*reinterpret_cast<const uint32_t*>(&key));
}
result = std::hash<float>()(key);
}

template<>
inline void Hash::operation(
const std::string_view& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
common::hash_t hashValue = 0;
auto data64 = reinterpret_cast<const uint64_t*>(key.data());
for (size_t i = 0u; i < key.size() / 8; i++) {
auto blockHash = kuzu::function::murmurhash64(*(data64 + i));
hashValue = kuzu::function::combineHashScalar(hashValue, blockHash);
}
uint64_t last = 0;
for (size_t i = 0u; i < key.size() % 8; i++) {
last |= key[key.size() / 8 * 8 + i] << i * 8;
}
hashValue = kuzu::function::combineHashScalar(hashValue, kuzu::function::murmurhash64(last));
result = hashValue;
const std::string& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
result = std::hash<std::string>()(key);
}

template<>
inline void Hash::operation(
const std::string& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
Hash::operation(std::string_view(key), result);
const std::string_view& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
result = std::hash<std::string_view>()(key);
}

template<>
inline void Hash::operation(
const common::ku_string_t& key, common::hash_t& result, common::ValueVector* /*keyVector*/) {
Hash::operation(key.getAsStringView(), result);
result = std::hash<std::string_view>()(key.getAsStringView());
}

template<>
Expand Down
6 changes: 3 additions & 3 deletions src/include/processor/operator/persistent/index_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,18 @@ class IndexBuilderLocalBuffers {
explicit IndexBuilderLocalBuffers(IndexBuilderGlobalQueues& globalQueues);

void insert(std::string key, common::offset_t value) {
auto indexPos = storage::HashIndexUtils::getHashIndexPosition(std::string_view(key));
auto indexPos = storage::getHashIndexPosition(key.c_str());
auto& stringBuffer = (*std::get<UniqueBuffers<std::string>>(buffers))[indexPos];
if (stringBuffer.full()) {
// StaticVector's move constructor leaves the original vector valid and empty
// StaticVector's move constructor leavse the original vector valid and empty
globalQueues->insert(indexPos, std::move(stringBuffer));
}
stringBuffer.push_back(std::make_pair(key, value)); // NOLINT(bugprone-use-after-move)
}

template<common::HashablePrimitive T>
void insert(T key, common::offset_t value) {
auto indexPos = storage::HashIndexUtils::getHashIndexPosition(key);
auto indexPos = storage::getHashIndexPosition(key);
auto& buffer = (*std::get<UniqueBuffers<T>>(buffers))[indexPos];
if (buffer.full()) {
globalQueues->insert(indexPos, std::move(buffer));
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/index/hash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class PrimaryKeyIndex {
template<typename T, typename S = T>
inline HashIndex<T, S>* getTypedHashIndex(T key) {
return common::ku_dynamic_cast<OnDiskHashIndex*, HashIndex<T, S>*>(
hashIndices[HashIndexUtils::getHashIndexPosition(key)].get());
hashIndices[getHashIndexPosition(key)].get());
}

inline bool lookup(
Expand Down
13 changes: 6 additions & 7 deletions src/include/storage/index/hash_index_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,32 +138,31 @@ class PrimaryKeyIndexBuilder {
// enough space already.
template<common::HashablePrimitive T>
bool append(T key, common::offset_t value) {
return appendWithIndexPos(key, value, HashIndexUtils::getHashIndexPosition(key));
return appendWithIndexPos(key, value, getHashIndexPosition(key));
}
bool append(std::string_view key, common::offset_t value) {
return appendWithIndexPos(key, value, HashIndexUtils::getHashIndexPosition(key));
return appendWithIndexPos(key, value, getHashIndexPosition(key));
}
template<common::HashablePrimitive T>
bool appendWithIndexPos(T key, common::offset_t value, uint64_t indexPos) {
KU_ASSERT(keyDataTypeID == common::TypeUtils::getPhysicalTypeIDForType<T>());
KU_ASSERT(HashIndexUtils::getHashIndexPosition(key) == indexPos);
KU_ASSERT(getHashIndexPosition(key) == indexPos);
return getTypedHashIndex<T>(indexPos)->append(key, value);
}
bool appendWithIndexPos(std::string_view key, common::offset_t value, uint64_t indexPos) {
KU_ASSERT(keyDataTypeID == common::PhysicalTypeID::STRING);
KU_ASSERT(HashIndexUtils::getHashIndexPosition(key) == indexPos);
KU_ASSERT(getHashIndexPosition(key) == indexPos);
return getTypedHashIndex<std::string_view, common::ku_string_t>(indexPos)->append(
key, value);
}
template<common::HashablePrimitive T>
bool lookup(T key, common::offset_t& result) {
KU_ASSERT(keyDataTypeID == common::TypeUtils::getPhysicalTypeIDForType<T>());
return getTypedHashIndex<T>(HashIndexUtils::getHashIndexPosition(key))->lookup(key, result);
return getTypedHashIndex<T>(getHashIndexPosition(key))->lookup(key, result);

Check warning on line 161 in src/include/storage/index/hash_index_builder.h

View check run for this annotation

Codecov / codecov/patch

src/include/storage/index/hash_index_builder.h#L161

Added line #L161 was not covered by tests
}
bool lookup(std::string_view key, common::offset_t& result) {
KU_ASSERT(keyDataTypeID == common::PhysicalTypeID::STRING);
return getTypedHashIndex<std::string_view, common::ku_string_t>(
HashIndexUtils::getHashIndexPosition(key))
return getTypedHashIndex<std::string_view, common::ku_string_t>(getHashIndexPosition(key))
->lookup(key, result);
}

Expand Down
16 changes: 12 additions & 4 deletions src/include/storage/index/hash_index_utils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <functional>

#include "common/types/ku_string.h"
#include "common/types/types.h"
#include "function/hash/hash_functions.h"
Expand All @@ -26,6 +28,16 @@ static constexpr common::page_idx_t O_SLOTS_HEADER_PAGE_IDX = 2;
static constexpr common::page_idx_t NUM_HEADER_PAGES = 3;
static constexpr uint64_t INDEX_HEADER_IDX_IN_ARRAY = 0;

inline uint64_t getHashIndexPosition(common::HashablePrimitive auto key) {
common::hash_t hash;
function::Hash::operation(key, hash, nullptr /*keyVector*/);
return (hash >> (64 - NUM_HASH_INDEXES_LOG2)) & (NUM_HASH_INDEXES - 1);
}
inline uint64_t getHashIndexPosition(std::string_view key) {
return (std::hash<std::string_view>()(key) >> (64 - NUM_HASH_INDEXES_LOG2)) &
(NUM_HASH_INDEXES - 1);
}

enum class SlotType : uint8_t { PRIMARY = 0, OVF = 1 };

struct SlotInfo {
Expand Down Expand Up @@ -65,10 +77,6 @@ class HashIndexUtils {
return slotId;
}

inline static uint64_t getHashIndexPosition(common::IndexHashable auto key) {
return (HashIndexUtils::hash(key) >> (64 - NUM_HASH_INDEXES_LOG2)) & (NUM_HASH_INDEXES - 1);
}

static inline uint64_t getNumRequiredEntries(
uint64_t numExistingEntries, uint64_t numNewEntries) {
return ceil((double)(numExistingEntries + numNewEntries) * common::DEFAULT_HT_LOAD_FACTOR);
Expand Down
32 changes: 0 additions & 32 deletions test/test_files/update_node/create_empty.test
Original file line number Diff line number Diff line change
Expand Up @@ -353,38 +353,6 @@ foobar-ewe-j323-8nd*-ewew
-STATEMENT MATCH (t:test) WHERE t.id = to_float(0.1) RETURN t.id;
---- 1
0.100000
-STATEMENT CREATE (t:test {id:0});
---- ok
# Zero and negative zero need to be equivalent
-STATEMENT MATCH (t:test) WHERE t.id = (0.0 * -1) RETURN t.id;
---- 1
0.000000
-STATEMENT MATCH (t:test) WHERE t.id = -0.0 RETURN t.id;
---- 1
0.000000
-STATEMENT MATCH (t:test) WHERE t.id = to_float("-0.0") RETURN t.id;
---- 1
0.000000
# infinity should work
-STATEMENT CREATE (t:test {id:1.0/0.0});
---- ok
-STATEMENT MATCH (t:test) WHERE t.id = 1.0/0.0 RETURN t.id;
---- 1
inf
-STATEMENT MATCH (t:test) WHERE t.id = to_float("inf") RETURN t.id;
---- 1
inf
-STATEMENT MATCH (t:test) WHERE t.id = to_float("-inf") RETURN t.id;
---- 0
# NaN should not be searchable
-STATEMENT CREATE (t:test {id:0.0/0.0});
---- ok
-STATEMENT MATCH (t:test) WHERE t.id = 0.0/0.0 RETURN t.id;
---- 0
-STATEMENT CREATE (t:test {id:sqrt(-1.0)});
---- ok
-STATEMENT MATCH (t:test) WHERE t.id = sqrt(-1.0) RETURN t.id;
---- 0

-CASE CreateBlobPK
-STATEMENT CREATE NODE TABLE test(id BLOB, PRIMARY KEY(id));
Expand Down

0 comments on commit 15bc801

Please sign in to comment.