From 3e3846e0703ad37834890a0873ba12fd928312f8 Mon Sep 17 00:00:00 2001 From: Manh Dinh Date: Thu, 4 Apr 2024 17:39:35 -0400 Subject: [PATCH] Fix test and change HASH type to UINT64 --- src/function/vector_hash_functions.cpp | 8 ++++---- src/include/common/types/types.h | 2 +- src/processor/operator/aggregate/aggregate_hash_table.cpp | 4 ++-- src/processor/operator/hash_join/hash_join_probe.cpp | 2 +- src/processor/result/base_hash_table.cpp | 4 ++-- test/test_files/function/hash/hash.test | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/function/vector_hash_functions.cpp b/src/function/vector_hash_functions.cpp index 70d41c44be1..fdc9c15976d 100644 --- a/src/function/vector_hash_functions.cpp +++ b/src/function/vector_hash_functions.cpp @@ -115,7 +115,7 @@ static void computeStructVecHash(ValueVector* operand, ValueVector* result) { case LogicalTypeID::STRUCT: { VectorHashFunction::computeHash( StructVector::getFieldVector(operand, 0 /* idx */).get(), result); - auto tmpHashVector = std::make_unique(LogicalTypeID::INT64); + auto tmpHashVector = std::make_unique(LogicalTypeID::UINT64); for (auto i = 1u; i < StructType::getNumFields(&operand->dataType); i++) { auto fieldVector = StructVector::getFieldVector(operand, i); VectorHashFunction::computeHash(fieldVector.get(), tmpHashVector.get()); @@ -129,7 +129,7 @@ static void computeStructVecHash(ValueVector* operand, ValueVector* result) { void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result) { result->state = operand->state; - KU_ASSERT(result->dataType.getLogicalTypeID() == LogicalTypeID::INT64); + KU_ASSERT(result->dataType.getLogicalTypeID() == LogicalTypeID::UINT64); switch (operand->dataType.getPhysicalType()) { case PhysicalTypeID::INTERNAL_ID: { UnaryHashFunctionExecutor::execute(*operand, *result); @@ -191,7 +191,7 @@ void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result) } void VectorHashFunction::combineHash(ValueVector* left, ValueVector* right, ValueVector* result) { - KU_ASSERT(left->dataType.getLogicalTypeID() == LogicalTypeID::INT64); + KU_ASSERT(left->dataType.getLogicalTypeID() == LogicalTypeID::UINT64); KU_ASSERT(left->dataType.getLogicalTypeID() == right->dataType.getLogicalTypeID()); KU_ASSERT(left->dataType.getLogicalTypeID() == result->dataType.getLogicalTypeID()); // TODO(Xiyang/Guodong): we should resolve result state of hash vector at compile time. @@ -227,7 +227,7 @@ function_set HashFunction::getFunctionSet() { function_set functionSet; functionSet.push_back( std::make_unique(name, std::vector{LogicalTypeID::ANY}, - LogicalTypeID::INT64, HashExecFunc, false /* isVarLength */)); + LogicalTypeID::UINT64, HashExecFunc, false /* isVarLength */)); return functionSet; } diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h index 528e28ba474..4ba08692fa3 100644 --- a/src/include/common/types/types.h +++ b/src/include/common/types/types.h @@ -318,7 +318,7 @@ class LogicalType { return std::make_unique(LogicalTypeID::BOOL); } static std::unique_ptr HASH() { - return std::make_unique(LogicalTypeID::INT64); + return std::make_unique(LogicalTypeID::UINT64); } static std::unique_ptr INT64() { return std::make_unique(LogicalTypeID::INT64); diff --git a/src/processor/operator/aggregate/aggregate_hash_table.cpp b/src/processor/operator/aggregate/aggregate_hash_table.cpp index 1b29edc6e4a..1ed4dbd9b49 100644 --- a/src/processor/operator/aggregate/aggregate_hash_table.cpp +++ b/src/processor/operator/aggregate/aggregate_hash_table.cpp @@ -47,9 +47,9 @@ bool AggregateHashTable::isAggregateValueDistinctForGroupByKeys( VectorHashFunction::computeHash(groupByFlatKeyVectors[0], hashVector.get()); computeAndCombineVecHash(groupByFlatKeyVectors, 1 /* startVecIdx */); auto tmpHashResultVector = - std::make_unique(LogicalTypeID::INT64, &memoryManager); + std::make_unique(LogicalTypeID::UINT64, &memoryManager); auto tmpHashCombineResultVector = - std::make_unique(LogicalTypeID::INT64, &memoryManager); + std::make_unique(LogicalTypeID::UINT64, &memoryManager); VectorHashFunction::computeHash(aggregateVector, tmpHashResultVector.get()); VectorHashFunction::combineHash( hashVector.get(), tmpHashResultVector.get(), tmpHashCombineResultVector.get()); diff --git a/src/processor/operator/hash_join/hash_join_probe.cpp b/src/processor/operator/hash_join/hash_join_probe.cpp index 7e8c4f5dcca..0443b62b9d4 100644 --- a/src/processor/operator/hash_join/hash_join_probe.cpp +++ b/src/processor/operator/hash_join/hash_join_probe.cpp @@ -27,7 +27,7 @@ void HashJoinProbe::initLocalStateInternal(ResultSet* resultSet, ExecutionContex LogicalTypeID::INT64, context->clientContext->getMemoryManager()); if (keyVectors.size() > 1) { tmpHashVector = std::make_unique( - LogicalTypeID::INT64, context->clientContext->getMemoryManager()); + LogicalTypeID::UINT64, context->clientContext->getMemoryManager()); } } diff --git a/src/processor/result/base_hash_table.cpp b/src/processor/result/base_hash_table.cpp index 2283c23c37b..3e2ef7840e0 100644 --- a/src/processor/result/base_hash_table.cpp +++ b/src/processor/result/base_hash_table.cpp @@ -31,9 +31,9 @@ void BaseHashTable::computeAndCombineVecHash( for (; startVecIdx < unFlatKeyVectors.size(); startVecIdx++) { auto keyVector = unFlatKeyVectors[startVecIdx]; auto tmpHashResultVector = - std::make_unique(LogicalTypeID::INT64, &memoryManager); + std::make_unique(LogicalTypeID::UINT64, &memoryManager); auto tmpHashCombineResultVector = - std::make_unique(LogicalTypeID::INT64, &memoryManager); + std::make_unique(LogicalTypeID::UINT64, &memoryManager); VectorHashFunction::computeHash(keyVector, tmpHashResultVector.get()); VectorHashFunction::combineHash( hashVector.get(), tmpHashResultVector.get(), tmpHashCombineResultVector.get()); diff --git a/test/test_files/function/hash/hash.test b/test/test_files/function/hash/hash.test index 507af074d93..25cce4e1d03 100644 --- a/test/test_files/function/hash/hash.test +++ b/test/test_files/function/hash/hash.test @@ -31,17 +31,17 @@ # String -STATEMENT RETURN hash('hello'); ---- 1 --2347141879292577852 +16099602194416973764 # Interval -STATEMENT RETURN hash(interval("1 years 2 months 3 hours")); ---- 1 --5050076354121952078 +13396667719587599538 # Struct -STATEMENT RETURN hash({name: 'Alice', age: 20}); ---- 1 --4980099835463142906 +13466644238246408710 # List -STATEMENT RETURN hash(list_creation(1,2,3,4,5,56,2));