Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to compute hash on list of struct #3157

Merged
merged 2 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 64 additions & 22 deletions src/function/vector_hash_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,68 @@
namespace kuzu {
namespace function {

static std::unique_ptr<ValueVector> computeDataVecHash(ValueVector* operand) {
auto hashVector = std::make_unique<ValueVector>(*LogicalType::VAR_LIST(LogicalType::HASH()));
auto numValuesInDataVec = ListVector::getDataVectorSize(operand);
ListVector::resizeDataVector(hashVector.get(), numValuesInDataVec);
auto selectionState = std::make_shared<DataChunkState>();
selectionState->selVector->resetSelectorToValuePosBuffer();
ListVector::getDataVector(operand)->setState(selectionState);
auto numValuesComputed = 0u;
while (numValuesComputed < numValuesInDataVec) {
for (auto i = 0u; i < DEFAULT_VECTOR_CAPACITY; i++) {
selectionState->selVector->selectedPositions[i] = numValuesComputed;
numValuesComputed++;
}
VectorHashFunction::computeHash(
ListVector::getDataVector(operand), ListVector::getDataVector(hashVector.get()));
}
return hashVector;
}

Check warning on line 27 in src/function/vector_hash_functions.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_hash_functions.cpp#L27

Added line #L27 was not covered by tests

static void finalizeDataVecHash(ValueVector* operand, ValueVector* result, ValueVector* hashVec) {
for (auto i = 0u; i < result->state->getNumSelectedValues(); i++) {
auto pos = operand->state->selVector->selectedPositions[i];
auto entry = operand->getValue<list_entry_t>(pos);
if (operand->isNull(pos)) {
result->setValue(pos, NULL_HASH);

Check warning on line 34 in src/function/vector_hash_functions.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_hash_functions.cpp#L34

Added line #L34 was not covered by tests
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
} else {
auto hashValue = NULL_HASH;
for (auto j = 0u; j < entry.size; j++) {
hashValue = combineHashScalar(hashValue,
ListVector::getDataVector(hashVec)->getValue<hash_t>(entry.offset + j));
}
result->setValue(pos, hashValue);
}
}
}

static void computeListVectorHash(ValueVector* operand, ValueVector* result) {
auto dataVecHash = computeDataVecHash(operand);
finalizeDataVecHash(operand, result, dataVecHash.get());
}

static void computeStructVecHash(ValueVector* operand, ValueVector* result) {
if (operand->dataType.getLogicalTypeID() == LogicalTypeID::NODE) {
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
KU_ASSERT(0 == common::StructType::getFieldIdx(&operand->dataType, InternalKeyword::ID));
UnaryHashFunctionExecutor::execute<internalID_t, hash_t>(
*StructVector::getFieldVector(operand, 0), *result);
} else if (operand->dataType.getLogicalTypeID() == LogicalTypeID::REL) {
KU_ASSERT(3 == StructType::getFieldIdx(&operand->dataType, InternalKeyword::ID));
UnaryHashFunctionExecutor::execute<internalID_t, hash_t>(
*StructVector::getFieldVector(operand, 3), *result);
} else {
VectorHashFunction::computeHash(
StructVector::getFieldVector(operand, 0 /* idx */).get(), result);
auto tmpHashVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
for (auto i = 1u; i < StructType::getNumFields(&operand->dataType); i++) {
auto fieldVector = StructVector::getFieldVector(operand, i);
VectorHashFunction::computeHash(fieldVector.get(), tmpHashVector.get());
VectorHashFunction::combineHash(tmpHashVector.get(), result, result);
}
}
}

void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result) {
result->state = operand->state;
KU_ASSERT(result->dataType.getLogicalTypeID() == LogicalTypeID::INT64);
Expand Down Expand Up @@ -57,30 +119,10 @@
UnaryHashFunctionExecutor::execute<interval_t, hash_t>(*operand, *result);
} break;
case PhysicalTypeID::STRUCT: {
if (operand->dataType.getLogicalTypeID() == LogicalTypeID::NODE) {
KU_ASSERT(
0 == common::StructType::getFieldIdx(&operand->dataType, InternalKeyword::ID));
UnaryHashFunctionExecutor::execute<internalID_t, hash_t>(
*StructVector::getFieldVector(operand, 0), *result);
} else if (operand->dataType.getLogicalTypeID() == LogicalTypeID::REL) {
KU_ASSERT(3 == StructType::getFieldIdx(&operand->dataType, InternalKeyword::ID));
UnaryHashFunctionExecutor::execute<internalID_t, hash_t>(
*StructVector::getFieldVector(operand, 3), *result);
} else {
VectorHashFunction::computeHash(
StructVector::getFieldVector(operand, 0 /* idx */).get(), result);
auto tmpHashVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
for (auto i = 1u; i < StructType::getNumFields(&operand->dataType); i++) {
auto fieldVector = StructVector::getFieldVector(operand, i);
VectorHashFunction::computeHash(fieldVector.get(), tmpHashVector.get());
VectorHashFunction::combineHash(tmpHashVector.get(), result, result);
}
}
computeStructVecHash(operand, result);
} break;
case PhysicalTypeID::VAR_LIST: {
// TODO(Ziyi): We should pass in the selection state here, and do vectorized hash
// computation.
UnaryHashFunctionExecutor::execute<list_entry_t, hash_t>(*operand, *result);
computeListVectorHash(operand, result);
} break;
// LCOV_EXCL_START
default: {
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ class LogicalType {
static std::unique_ptr<LogicalType> BOOL() {
return std::make_unique<LogicalType>(LogicalTypeID::BOOL);
}
static std::unique_ptr<LogicalType> HASH() {
return std::make_unique<LogicalType>(LogicalTypeID::INT64);
}
static std::unique_ptr<LogicalType> INT64() {
return std::make_unique<LogicalType>(LogicalTypeID::INT64);
}
Expand Down
26 changes: 0 additions & 26 deletions src/include/function/hash/hash_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,32 +189,6 @@ inline void Hash::operation(
combineHashScalar(murmurhash64(key.days), murmurhash64(key.micros)));
}

template<>
inline void Hash::operation(
const common::list_entry_t& key, common::hash_t& result, common::ValueVector* keyVector) {
auto dataVector = common::ListVector::getDataVector(keyVector);
result = NULL_HASH;
common::hash_t tmpResult;
for (auto i = 0u; i < key.size; i++) {
auto pos = key.offset + i;
if (dataVector->isNull(pos)) {
result = combineHashScalar(result, NULL_HASH);
} else {
common::TypeUtils::visit(
dataVector->dataType.getPhysicalType(),
[&]<HashableTypes T>(
T) { operation(dataVector->getValue<T>(pos), tmpResult, dataVector); },
[&](common::struct_entry_t) {
// LCOV_EXCL_START
throw common::RuntimeException{"Hash on list of struct is not supported yet."};
// LCOV_EXCL_STOP
},
[](auto) { KU_UNREACHABLE; });
result = combineHashScalar(result, tmpResult);
}
}
}

template<>
inline void Hash::operation(const std::unordered_set<std::string>& key, common::hash_t& result,
common::ValueVector* /*keyVector*/) {
Expand Down
23 changes: 23 additions & 0 deletions test/test_files/tinysnb/agg/hash.test
Original file line number Diff line number Diff line change
Expand Up @@ -546,3 +546,26 @@ True|2
{rating: 7.000000, stars: 10, views: 982, release: 2018-11-13 13:33:11, release_ns: 2018-11-13 13:33:11.123456, release_ms: 2018-11-13 13:33:11.123, release_sec: 2018-11-13 13:33:11, release_tz: 2018-11-13 13:33:11.123456+00, film: 2014-09-12, u8: 12, u16: 120, u32: 55, u64: 1, hugedata: -1844674407370955161511}|2
{rating: 1223.000000, stars: 100, views: 10003, release: 2011-02-11 16:44:22, release_ns: 2011-02-11 16:44:22.123456, release_ms: 2011-02-11 16:44:22.123, release_sec: 2011-02-11 16:44:22, release_tz: 2011-02-11 16:44:22.123456+00, film: 2013-02-22, u8: 1, u16: 15, u32: 200, u64: 4, hugedata: -15}|2
{rating: 55.000000, stars: 2, views: 88, release: 2022-01-22 00:00:00, release_ns: 2025-01-13 13:33:11.123456, release_ms: 2018-11-13 13:33:11.123, release_sec: 2011-01-11 00:00:00, release_tz: 2011-11-11 00:00:00+00, film: 2022-01-11, u8: 3, u16: 22, u32: 22, u64: 56, hugedata: 999999}|1

-LOG HashOnListOfStruct
-STATEMENT MATCH (o:organisation) RETURN [o.state], count(*)
---- 4
[{revenue: 138, location: ['toronto','montr,eal'], stock: {price: [96,56], volume: 1000}}]|2
[{revenue: 152, location: ["vanco,uver north area"], stock: {price: [15,78,671], volume: 432}}]|1
[{revenue: 55, location: ['toronto'], stock: {price: [22,33], volume: 28}}]|1
[{revenue: 558, location: ['very long city name','new york'], stock: {price: [22], volume: 99}}]|2
-STATEMENT MATCH (m:movies) RETURN [m.description], count(*)
---- 4
[{rating: 5.300000, stars: 2, views: 152, release: 2011-08-20 11:25:30, release_ns: 2011-08-20 11:25:30.123456, release_ms: 2011-08-20 11:25:30.123, release_sec: 2011-08-20 11:25:30, release_tz: 2011-08-20 11:25:30.123456+00, film: 2012-05-11, u8: 220, u16: 20, u32: 1, u64: 180, hugedata: 1844674407370955161811111111}]|1
[{rating: 7.000000, stars: 10, views: 982, release: 2018-11-13 13:33:11, release_ns: 2018-11-13 13:33:11.123456, release_ms: 2018-11-13 13:33:11.123, release_sec: 2018-11-13 13:33:11, release_tz: 2018-11-13 13:33:11.123456+00, film: 2014-09-12, u8: 12, u16: 120, u32: 55, u64: 1, hugedata: -1844674407370955161511}]|2
[{rating: 1223.000000, stars: 100, views: 10003, release: 2011-02-11 16:44:22, release_ns: 2011-02-11 16:44:22.123456, release_ms: 2011-02-11 16:44:22.123, release_sec: 2011-02-11 16:44:22, release_tz: 2011-02-11 16:44:22.123456+00, film: 2013-02-22, u8: 1, u16: 15, u32: 200, u64: 4, hugedata: -15}]|2
[{rating: 55.000000, stars: 2, views: 88, release: 2022-01-22 00:00:00, release_ns: 2025-01-13 13:33:11.123456, release_ms: 2018-11-13 13:33:11.123, release_sec: 2011-01-11 00:00:00, release_tz: 2011-11-11 00:00:00+00, film: 2022-01-11, u8: 3, u16: 22, u32: 22, u64: 56, hugedata: 999999}]|1
-STATEMENT MATCH (o:organisation) RETURN distinct [o.state];
---- 4
[{revenue: 138, location: ['toronto','montr,eal'], stock: {price: [96,56], volume: 1000}}]
[{revenue: 152, location: ["vanco,uver north area"], stock: {price: [15,78,671], volume: 432}}]
[{revenue: 55, location: ['toronto'], stock: {price: [22,33], volume: 28}}]
[{revenue: 558, location: ['very long city name','new york'], stock: {price: [22], volume: 99}}]
-STATEMENT MATCH (p:person) return distinct collect(p);
---- 1
[{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]], grades: [96,54,86,92], height: 1.731000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11},{_ID: 0:1, _LABEL: person, ID: 2, fName: Bob, gender: 2, isStudent: True, isWorker: False, age: 30, eyeSight: 5.100000, birthdate: 1900-01-01, registerTime: 2008-11-03 15:25:30.000526, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [12,8], usedNames: [Bobby], courseScoresPerTerm: [[8,9],[9,10]], grades: [98,42,93,88], height: 0.990000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a12},{_ID: 0:2, _LABEL: person, ID: 3, fName: Carol, gender: 1, isStudent: False, isWorker: True, age: 45, eyeSight: 5.000000, birthdate: 1940-06-22, registerTime: 1911-08-20 02:32:21, lastJobDuration: 48:24:11, workedHours: [4,5], usedNames: [Carmen,Fred], courseScoresPerTerm: [[8,10]], grades: [91,75,21,95], height: 1.000000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a13},{_ID: 0:3, _LABEL: person, ID: 5, fName: Dan, gender: 2, isStudent: False, isWorker: True, age: 20, eyeSight: 4.800000, birthdate: 1950-07-23, registerTime: 2031-11-30 12:25:30, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1,9], usedNames: [Wolfeschlegelstein,Daniel], courseScoresPerTerm: [[7,4],[8,8],[9]], grades: [76,88,99,89], height: 1.300000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a14},{_ID: 0:4, _LABEL: person, ID: 7, fName: Elizabeth, gender: 1, isStudent: False, isWorker: True, age: 20, eyeSight: 4.700000, birthdate: 1980-10-26, registerTime: 1976-12-23 11:21:42, lastJobDuration: 48:24:11, workedHours: [2], usedNames: [Ein], courseScoresPerTerm: [[6],[7],[8]], grades: [96,59,65,88], height: 1.463000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a15},{_ID: 0:5, _LABEL: person, ID: 8, fName: Farooq, gender: 2, isStudent: True, isWorker: False, age: 25, eyeSight: 4.500000, birthdate: 1980-10-26, registerTime: 1972-07-31 13:22:30.678559, lastJobDuration: 00:18:00.024, workedHours: [3,4,5,6,7], usedNames: [Fesdwe], courseScoresPerTerm: [[8]], grades: [80,78,34,83], height: 1.510000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a16},{_ID: 0:6, _LABEL: person, ID: 9, fName: Greg, gender: 2, isStudent: False, isWorker: False, age: 40, eyeSight: 4.900000, birthdate: 1980-10-26, registerTime: 1976-12-23 04:41:42, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1], usedNames: [Grad], courseScoresPerTerm: [[10]], grades: [43,83,67,43], height: 1.600000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a17},{_ID: 0:7, _LABEL: person, ID: 10, fName: Hubert Blaine Wolfeschlegelsteinhausenbergerdorff, gender: 2, isStudent: False, isWorker: True, age: 83, eyeSight: 4.900000, birthdate: 1990-11-27, registerTime: 2023-02-21 13:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,11,12,3,4,5,6,7], usedNames: [Ad,De,Hi,Kye,Orlan], courseScoresPerTerm: [[7],[10],[6,7]], grades: [77,64,100,54], height: 1.323000, u: a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a18}]
Loading