diff --git a/src/binder/bind_expression/bind_function_expression.cpp b/src/binder/bind_expression/bind_function_expression.cpp index 83f531b79a..ca0e2f0782 100644 --- a/src/binder/bind_expression/bind_function_expression.cpp +++ b/src/binder/bind_expression/bind_function_expression.cpp @@ -85,8 +85,14 @@ std::shared_ptr ExpressionBinder::bindAggregateFunctionExpression( if (children.empty()) { uniqueExpressionName = binder->getUniqueExpressionName(uniqueExpressionName); } - return make_shared(DataType(function->returnTypeID), - std::move(children), function->aggregateFunction->clone(), uniqueExpressionName); + DataType returnType; + if (function->bindFunc) { + function->bindFunc(childrenTypes, function, returnType); + } else { + returnType = DataType(function->returnTypeID); + } + return make_shared(returnType, std::move(children), + function->aggregateFunction->clone(), uniqueExpressionName); } std::shared_ptr ExpressionBinder::staticEvaluate(const std::string& functionName, diff --git a/src/function/aggregate_function.cpp b/src/function/aggregate_function.cpp index e9e716902f..dcee4cb6ae 100644 --- a/src/function/aggregate_function.cpp +++ b/src/function/aggregate_function.cpp @@ -1,6 +1,7 @@ #include "function/aggregate/aggregate_function.h" #include "function/aggregate/avg.h" +#include "function/aggregate/collect.h" #include "function/aggregate/count.h" #include "function/aggregate/count_star.h" #include "function/aggregate/min_max.h" @@ -69,6 +70,13 @@ std::unique_ptr AggregateFunctionUtil::getMaxFunction( return getMinMaxFunction(inputType, isDistinct); } +std::unique_ptr AggregateFunctionUtil::getCollectFunction( + const common::DataType& inputType, bool isDistinct) { + return std::make_unique(CollectFunction::initialize, + CollectFunction::updateAll, CollectFunction::updatePos, CollectFunction::combine, + CollectFunction::finalize, inputType, isDistinct); +} + template std::unique_ptr AggregateFunctionUtil::getMinMaxFunction( const DataType& inputType, bool isDistinct) { diff --git a/src/function/built_in_aggregate_functions.cpp b/src/function/built_in_aggregate_functions.cpp index d589c012b5..681393ee03 100644 --- a/src/function/built_in_aggregate_functions.cpp +++ b/src/function/built_in_aggregate_functions.cpp @@ -1,5 +1,7 @@ #include "function/aggregate/built_in_aggregate_functions.h" +#include "function/aggregate/collect.h" + using namespace kuzu::common; namespace kuzu { @@ -35,10 +37,10 @@ uint32_t BuiltInAggregateFunctions::getFunctionCost(const std::vector& isDistinct != function->isDistinct) { return UINT32_MAX; } - // Currently all aggregate functions takes either 0 or 1 parameter. Therefore we do not allow - // any implicit cast and require a perfect match. for (auto i = 0u; i < inputTypes.size(); ++i) { - if (inputTypes[i].typeID != function->parameterTypeIDs[i]) { + if (function->parameterTypeIDs[i] == ANY) { + continue; + } else if (inputTypes[i].typeID != function->parameterTypeIDs[i]) { return UINT32_MAX; } } @@ -70,13 +72,14 @@ void BuiltInAggregateFunctions::registerAggregateFunctions() { registerAvg(); registerMin(); registerMax(); + registerCollect(); } void BuiltInAggregateFunctions::registerCountStar() { std::vector> definitions; definitions.push_back(std::make_unique(COUNT_STAR_FUNC_NAME, std::vector{}, INT64, AggregateFunctionUtil::getCountStarFunction(), false)); - aggregateFunctions.insert({COUNT_STAR_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({COUNT_STAR_FUNC_NAME, std::move(definitions)}); } void BuiltInAggregateFunctions::registerCount() { @@ -90,7 +93,7 @@ void BuiltInAggregateFunctions::registerCount() { AggregateFunctionUtil::getCountFunction(inputType, isDistinct), isDistinct)); } } - aggregateFunctions.insert({COUNT_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({COUNT_FUNC_NAME, std::move(definitions)}); } void BuiltInAggregateFunctions::registerSum() { @@ -102,7 +105,7 @@ void BuiltInAggregateFunctions::registerSum() { AggregateFunctionUtil::getSumFunction(DataType(typeID), isDistinct), isDistinct)); } } - aggregateFunctions.insert({SUM_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({SUM_FUNC_NAME, std::move(definitions)}); } void BuiltInAggregateFunctions::registerAvg() { @@ -114,7 +117,7 @@ void BuiltInAggregateFunctions::registerAvg() { AggregateFunctionUtil::getAvgFunction(DataType(typeID), isDistinct), isDistinct)); } } - aggregateFunctions.insert({AVG_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({AVG_FUNC_NAME, std::move(definitions)}); } void BuiltInAggregateFunctions::registerMin() { @@ -126,7 +129,7 @@ void BuiltInAggregateFunctions::registerMin() { AggregateFunctionUtil::getMinFunction(DataType(typeID), isDistinct), isDistinct)); } } - aggregateFunctions.insert({MIN_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({MIN_FUNC_NAME, std::move(definitions)}); } void BuiltInAggregateFunctions::registerMax() { @@ -138,7 +141,18 @@ void BuiltInAggregateFunctions::registerMax() { AggregateFunctionUtil::getMaxFunction(DataType(typeID), isDistinct), isDistinct)); } } - aggregateFunctions.insert({MAX_FUNC_NAME, move(definitions)}); + aggregateFunctions.insert({MAX_FUNC_NAME, std::move(definitions)}); +} + +void BuiltInAggregateFunctions::registerCollect() { + std::vector> definitions; + for (auto isDistinct : std::vector{true, false}) { + definitions.push_back(std::make_unique(COLLECT_FUNC_NAME, + std::vector{ANY}, LIST, + AggregateFunctionUtil::getCollectFunction(DataType(ANY), isDistinct), isDistinct, + CollectFunction::bindFunc)); + } + aggregateFunctions.insert({COLLECT_FUNC_NAME, std::move(definitions)}); } } // namespace function diff --git a/src/function/vector_list_operation.cpp b/src/function/vector_list_operation.cpp index b521899ee3..e0be1110ed 100644 --- a/src/function/vector_list_operation.cpp +++ b/src/function/vector_list_operation.cpp @@ -47,7 +47,7 @@ void VectorListOperations::ListCreation( } void ListCreationVectorOperation::listCreationBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& actualReturnType) { + FunctionDefinition* definition, DataType& actualReturnType) { if (argumentTypes.empty()) { throw BinderException( "Cannot resolve child data type for " + LIST_CREATION_FUNC_NAME + "."); @@ -80,40 +80,41 @@ std::vector> ListLenVectorOperation:: } void ListExtractVectorOperation::listExtractBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& returnType) { + FunctionDefinition* definition, DataType& returnType) { definition->returnTypeID = argumentTypes[0].childType->typeID; + auto vectorOperationDefinition = reinterpret_cast(definition); returnType = *argumentTypes[0].childType; switch (definition->returnTypeID) { case BOOL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case INT64: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DOUBLE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DATE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case TIMESTAMP: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case INTERVAL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case STRING: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case LIST: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; default: { @@ -139,8 +140,8 @@ std::vector> ListConcatVectorOperation::getDefinitions() { std::vector> result; auto execFunc = BinaryListExecFunction; - auto bindFunc = [](const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& actualReturnType) { + auto bindFunc = [](const std::vector& argumentTypes, FunctionDefinition* definition, + DataType& actualReturnType) { if (argumentTypes[0] != argumentTypes[1]) { throw BinderException(getListFunctionIncompatibleChildrenTypeErrorMsg( LIST_CONCAT_FUNC_NAME, argumentTypes[0], argumentTypes[1])); @@ -155,44 +156,45 @@ ListConcatVectorOperation::getDefinitions() { } void ListAppendVectorOperation::listAppendBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& returnType) { + FunctionDefinition* definition, DataType& returnType) { if (*argumentTypes[0].childType != argumentTypes[1]) { throw BinderException(getListFunctionIncompatibleChildrenTypeErrorMsg( LIST_APPEND_FUNC_NAME, argumentTypes[0], argumentTypes[1])); } + auto vectorOperationDefinition = reinterpret_cast(definition); definition->returnTypeID = argumentTypes[0].typeID; returnType = argumentTypes[0]; switch (argumentTypes[1].typeID) { case INT64: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DOUBLE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case BOOL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case STRING: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DATE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case TIMESTAMP: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case INTERVAL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case LIST: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; default: { @@ -211,44 +213,45 @@ ListAppendVectorOperation::getDefinitions() { } void ListPrependVectorOperation::listPrependBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& returnType) { + FunctionDefinition* definition, DataType& returnType) { if (argumentTypes[0] != *argumentTypes[1].childType) { throw BinderException(getListFunctionIncompatibleChildrenTypeErrorMsg( LIST_APPEND_FUNC_NAME, argumentTypes[0], argumentTypes[1])); } + auto vectorOperationDefinition = reinterpret_cast(definition); definition->returnTypeID = argumentTypes[1].typeID; returnType = argumentTypes[1]; switch (argumentTypes[0].typeID) { case INT64: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DOUBLE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case BOOL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case STRING: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case DATE: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case TIMESTAMP: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case INTERVAL: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; case LIST: { - definition->execFunc = + vectorOperationDefinition->execFunc = BinaryListExecFunction; } break; default: { @@ -280,8 +283,8 @@ ListContainsVectorOperation::getDefinitions() { std::vector> ListSliceVectorOperation::getDefinitions() { std::vector> result; - auto bindFunc = [](const std::vector& argumentTypes, - VectorOperationDefinition* definition, DataType& actualReturnType) { + auto bindFunc = [](const std::vector& argumentTypes, FunctionDefinition* definition, + DataType& actualReturnType) { definition->returnTypeID = argumentTypes[0].typeID; actualReturnType = argumentTypes[0]; }; diff --git a/src/include/common/expression_type.h b/src/include/common/expression_type.h index 7c5acff15a..558b4924b8 100644 --- a/src/include/common/expression_type.h +++ b/src/include/common/expression_type.h @@ -17,6 +17,7 @@ const std::string SUM_FUNC_NAME = "SUM"; const std::string AVG_FUNC_NAME = "AVG"; const std::string MIN_FUNC_NAME = "MIN"; const std::string MAX_FUNC_NAME = "MAX"; +const std::string COLLECT_FUNC_NAME = "COLLECT"; // cast const std::string CAST_TO_DATE_FUNC_NAME = "DATE"; diff --git a/src/include/function/aggregate/aggregate_function.h b/src/include/function/aggregate/aggregate_function.h index a1c4820ac2..e7b822f45d 100644 --- a/src/include/function/aggregate/aggregate_function.h +++ b/src/include/function/aggregate/aggregate_function.h @@ -5,6 +5,7 @@ #include "common/vector/value_vector.h" #include "function/function_definition.h" +#include "function/vector_operations.h" namespace kuzu { namespace function { @@ -19,23 +20,30 @@ struct AggregateFunctionDefinition : public FunctionDefinition { : FunctionDefinition{std::move(name), std::move(parameterTypeIDs), returnTypeID}, aggregateFunction{std::move(aggregateFunction)}, isDistinct{isDistinct} {} + AggregateFunctionDefinition(std::string name, std::vector parameterTypeIDs, + common::DataTypeID returnTypeID, std::unique_ptr aggregateFunction, + bool isDistinct, scalar_bind_func bindFunc) + : FunctionDefinition{std::move(name), std::move(parameterTypeIDs), returnTypeID, + std::move(bindFunc)}, + aggregateFunction{std::move(aggregateFunction)}, isDistinct{isDistinct} {} + std::unique_ptr aggregateFunction; bool isDistinct; }; struct AggregateState { virtual inline uint32_t getStateSize() const = 0; - virtual uint8_t* getResult() const = 0; + virtual void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) = 0; virtual ~AggregateState() = default; bool isNull = true; }; using aggr_initialize_function_t = std::function()>; -using aggr_update_all_function_t = - std::function; -using aggr_update_pos_function_t = std::function; +using aggr_update_all_function_t = std::function; +using aggr_update_pos_function_t = std::function; using aggr_combine_function_t = std::function; using aggr_finalize_function_t = std::function; @@ -63,13 +71,14 @@ class AggregateFunction { return initializeFunc(); } - inline void updateAllState(uint8_t* state, common::ValueVector* input, uint64_t multiplicity) { - return updateAllFunc(state, input, multiplicity); + inline void updateAllState(uint8_t* state, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { + return updateAllFunc(state, input, multiplicity, memoryManager); } - inline void updatePosState( - uint8_t* state, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { - return updatePosFunc(state, input, multiplicity, pos); + inline void updatePosState(uint8_t* state, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { + return updatePosFunc(state, input, multiplicity, pos, memoryManager); } inline void combineState(uint8_t* state, uint8_t* otherState) { @@ -80,6 +89,8 @@ class AggregateFunction { inline common::DataType getInputDataType() const { return inputDataType; } + inline void setInputDataType(common::DataType dataType) { inputDataType = std::move(dataType); } + inline bool isFunctionDistinct() const { return isDistinct; } std::unique_ptr clone() { @@ -114,6 +125,8 @@ class AggregateFunctionUtil { const common::DataType& inputType, bool isDistinct); static std::unique_ptr getMaxFunction( const common::DataType& inputType, bool isDistinct); + static std::unique_ptr getCollectFunction( + const common::DataType& inputType, bool isDistinct); private: template diff --git a/src/include/function/aggregate/avg.h b/src/include/function/aggregate/avg.h index 981087cccf..d899b19187 100644 --- a/src/include/function/aggregate/avg.h +++ b/src/include/function/aggregate/avg.h @@ -11,7 +11,10 @@ struct AvgFunction { struct AvgState : public AggregateState { inline uint32_t getStateSize() const override { return sizeof(*this); } - inline uint8_t* getResult() const override { return (uint8_t*)&avg; } + inline void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) override { + memcpy(outputVector->getData() + pos * outputVector->getNumBytesPerValue(), + reinterpret_cast(&avg), outputVector->getNumBytesPerValue()); + } T sum; uint64_t count = 0; @@ -20,7 +23,8 @@ struct AvgFunction { static std::unique_ptr initialize() { return std::make_unique(); } - static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity) { + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { auto state = reinterpret_cast(state_); assert(!input->state->isFlat()); if (input->hasNoNullsGuarantee()) { @@ -38,8 +42,8 @@ struct AvgFunction { } } - static inline void updatePos( - uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { + static inline void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { updateSingleValue(reinterpret_cast(state_), input, pos, multiplicity); } diff --git a/src/include/function/aggregate/base_count.h b/src/include/function/aggregate/base_count.h index 11ec053c10..c7877ac5bf 100644 --- a/src/include/function/aggregate/base_count.h +++ b/src/include/function/aggregate/base_count.h @@ -9,7 +9,10 @@ struct BaseCountFunction { struct CountState : public AggregateState { inline uint32_t getStateSize() const override { return sizeof(*this); } - inline uint8_t* getResult() const override { return (uint8_t*)&count; } + inline void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) override { + memcpy(outputVector->getData() + pos * outputVector->getNumBytesPerValue(), + reinterpret_cast(&count), outputVector->getNumBytesPerValue()); + } uint64_t count = 0; }; diff --git a/src/include/function/aggregate/built_in_aggregate_functions.h b/src/include/function/aggregate/built_in_aggregate_functions.h index 2497203c23..04dfe9f990 100644 --- a/src/include/function/aggregate/built_in_aggregate_functions.h +++ b/src/include/function/aggregate/built_in_aggregate_functions.h @@ -34,6 +34,7 @@ class BuiltInAggregateFunctions { void registerAvg(); void registerMin(); void registerMax(); + void registerCollect(); private: std::unordered_map>> diff --git a/src/include/function/aggregate/collect.h b/src/include/function/aggregate/collect.h new file mode 100644 index 0000000000..715fe0e14a --- /dev/null +++ b/src/include/function/aggregate/collect.h @@ -0,0 +1,135 @@ +#pragma once + +#include "common/in_mem_overflow_buffer_utils.h" +#include "common/vector/value_vector_utils.h" +#include "processor/result/factorized_table.h" + +namespace kuzu { +namespace function { + +struct CollectFunction { + + struct CollectState : public AggregateState { + CollectState() : factorizedTable{nullptr} {} + inline uint32_t getStateSize() const override { return sizeof(*this); } + void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) override { + auto dstKUList = outputVector->getValue(pos); + auto numBytesPerElement = + factorizedTable->getTableSchema()->getColumn(0)->getNumBytes(); + dstKUList.size = factorizedTable->getNumTuples(); + dstKUList.overflowPtr = + reinterpret_cast(outputVector->getOverflowBuffer().allocateSpace( + factorizedTable->getNumTuples() * numBytesPerElement)); + outputVector->setValue(pos, dstKUList); + switch (outputVector->dataType.childType->typeID) { + case common::STRING: { + for (auto i = 0u; i < dstKUList.size; i++) { + common::InMemOverflowBufferUtils::copyString( + *reinterpret_cast(factorizedTable->getTuple(i)), + reinterpret_cast(dstKUList.overflowPtr)[i], + outputVector->getOverflowBuffer()); + } + } break; + case common::LIST: { + for (auto i = 0u; i < dstKUList.size; i++) { + common::InMemOverflowBufferUtils::copyListRecursiveIfNested( + *reinterpret_cast(factorizedTable->getTuple(i)), + reinterpret_cast(dstKUList.overflowPtr)[i], + *outputVector->dataType.childType, outputVector->getOverflowBuffer()); + } + } break; + default: { + for (auto i = 0u; i < dstKUList.size; i++) { + memcpy( + reinterpret_cast(dstKUList.overflowPtr) + i * numBytesPerElement, + factorizedTable->getTuple(i), numBytesPerElement); + } + } + } + // CollectStates are stored in factorizedTable entries. When the factorizedTable is + // destructed, the destructor of CollectStates won't be called. Therefore, we need to + // manually deallocate the memory of CollectStates. + factorizedTable.reset(); + } + + std::unique_ptr factorizedTable; + }; + + static std::unique_ptr initialize() { return std::make_unique(); } + + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { + assert(!input->state->isFlat()); + auto state = reinterpret_cast(state_); + if (input->hasNoNullsGuarantee()) { + for (auto i = 0u; i < input->state->selVector->selectedSize; ++i) { + auto pos = input->state->selVector->selectedPositions[i]; + updateSingleValue(state, input, pos, multiplicity, memoryManager); + } + } else { + for (auto i = 0u; i < input->state->selVector->selectedSize; ++i) { + auto pos = input->state->selVector->selectedPositions[i]; + if (!input->isNull(pos)) { + updateSingleValue(state, input, pos, multiplicity, memoryManager); + } + } + } + } + + static inline void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { + auto state = reinterpret_cast(state_); + updateSingleValue(state, input, pos, multiplicity, memoryManager); + } + + static void initCollectStateIfNecessary( + CollectState* state, storage::MemoryManager* memoryManager, common::DataType& dataType) { + if (state->factorizedTable == nullptr) { + auto tableSchema = std::make_unique(); + tableSchema->appendColumn( + std::make_unique(false /* isUnflat */, + 0 /* dataChunkPos */, common::Types::getDataTypeSize(dataType))); + state->factorizedTable = + std::make_unique(memoryManager, std::move(tableSchema)); + } + } + + static void updateSingleValue(CollectState* state, common::ValueVector* input, uint32_t pos, + uint64_t multiplicity, storage::MemoryManager* memoryManager) { + initCollectStateIfNecessary(state, memoryManager, input->dataType); + for (auto i = 0u; i < multiplicity; ++i) { + auto tuple = state->factorizedTable->appendEmptyTuple(); + state->isNull = false; + common::ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos( + *input, pos, tuple, *state->factorizedTable->getInMemOverflowBuffer()); + } + } + + static void combine(uint8_t* state_, uint8_t* otherState_) { + auto otherState = reinterpret_cast(otherState_); + if (otherState->isNull) { + return; + } + auto state = reinterpret_cast(state_); + if (state->isNull) { + state->factorizedTable = std::move(otherState->factorizedTable); + state->isNull = false; + } else { + state->factorizedTable->merge(*otherState->factorizedTable); + } + } + + static void finalize(uint8_t* state_) {} + + static void bindFunc(const std::vector& argumentTypes, + FunctionDefinition* definition, common::DataType& returnType) { + assert(argumentTypes.size() == 1); + auto aggFuncDefinition = reinterpret_cast(definition); + aggFuncDefinition->aggregateFunction->setInputDataType(argumentTypes[0]); + returnType = + common::DataType(common::LIST, std::make_unique(argumentTypes[0])); + } +}; + +} // namespace function +} // namespace kuzu diff --git a/src/include/function/aggregate/count.h b/src/include/function/aggregate/count.h index 3adeba9145..e69a4e7728 100644 --- a/src/include/function/aggregate/count.h +++ b/src/include/function/aggregate/count.h @@ -7,7 +7,8 @@ namespace function { struct CountFunction : public BaseCountFunction { - static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity) { + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { auto state = reinterpret_cast(state_); if (input->hasNoNullsGuarantee()) { for (auto i = 0u; i < input->state->selVector->selectedSize; ++i) { @@ -23,8 +24,8 @@ struct CountFunction : public BaseCountFunction { } } - static inline void updatePos( - uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { + static inline void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { reinterpret_cast(state_)->count += multiplicity; } }; diff --git a/src/include/function/aggregate/count_star.h b/src/include/function/aggregate/count_star.h index d56457f8e3..8299f094ba 100644 --- a/src/include/function/aggregate/count_star.h +++ b/src/include/function/aggregate/count_star.h @@ -7,14 +7,15 @@ namespace function { struct CountStarFunction : public BaseCountFunction { - static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity) { + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { auto state = reinterpret_cast(state_); assert(input == nullptr); state->count += multiplicity; } - static void updatePos( - uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { + static void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { auto state = reinterpret_cast(state_); assert(input == nullptr); state->count += multiplicity; diff --git a/src/include/function/aggregate/min_max.h b/src/include/function/aggregate/min_max.h index e681ad5b48..c5299f6ac0 100644 --- a/src/include/function/aggregate/min_max.h +++ b/src/include/function/aggregate/min_max.h @@ -11,7 +11,10 @@ struct MinMaxFunction { struct MinMaxState : public AggregateState { inline uint32_t getStateSize() const override { return sizeof(*this); } - inline uint8_t* getResult() const override { return (uint8_t*)&val; } + inline void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) override { + memcpy(outputVector->getData() + pos * outputVector->getNumBytesPerValue(), + reinterpret_cast(&val), outputVector->getNumBytesPerValue()); + } T val; }; @@ -19,7 +22,8 @@ struct MinMaxFunction { static std::unique_ptr initialize() { return std::make_unique(); } template - static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity) { + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { assert(!input->state->isFlat()); auto state = reinterpret_cast(state_); if (input->hasNoNullsGuarantee()) { @@ -38,8 +42,8 @@ struct MinMaxFunction { } template - static inline void updatePos( - uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { + static inline void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { updateSingleValue(reinterpret_cast(state_), input, pos); } diff --git a/src/include/function/aggregate/sum.h b/src/include/function/aggregate/sum.h index 7f65f49a01..4c12d26ddc 100644 --- a/src/include/function/aggregate/sum.h +++ b/src/include/function/aggregate/sum.h @@ -11,14 +11,18 @@ struct SumFunction { struct SumState : public AggregateState { inline uint32_t getStateSize() const override { return sizeof(*this); } - inline uint8_t* getResult() const override { return (uint8_t*)∑ } + inline void moveResultToVector(common::ValueVector* outputVector, uint64_t pos) override { + memcpy(outputVector->getData() + pos * outputVector->getNumBytesPerValue(), + reinterpret_cast(&sum), outputVector->getNumBytesPerValue()); + } T sum; }; static std::unique_ptr initialize() { return std::make_unique(); } - static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity) { + static void updateAll(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + storage::MemoryManager* memoryManager) { assert(!input->state->isFlat()); auto state = reinterpret_cast(state_); if (input->hasNoNullsGuarantee()) { @@ -36,8 +40,8 @@ struct SumFunction { } } - static inline void updatePos( - uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, uint32_t pos) { + static inline void updatePos(uint8_t* state_, common::ValueVector* input, uint64_t multiplicity, + uint32_t pos, storage::MemoryManager* memoryManager) { auto state = reinterpret_cast(state_); updateSingleValue(state, input, pos, multiplicity); } diff --git a/src/include/function/function_definition.h b/src/include/function/function_definition.h index 6335dc24ac..a66fc36b4a 100644 --- a/src/include/function/function_definition.h +++ b/src/include/function/function_definition.h @@ -5,12 +5,22 @@ namespace kuzu { namespace function { +// Forward declaration of FunctionDefinition. +struct FunctionDefinition; + +using scalar_bind_func = std::function&, FunctionDefinition*, common::DataType&)>; + struct FunctionDefinition { FunctionDefinition(std::string name, std::vector parameterTypeIDs, common::DataTypeID returnTypeID) : name{std::move(name)}, parameterTypeIDs{std::move(parameterTypeIDs)}, returnTypeID{ returnTypeID} {} + FunctionDefinition(std::string name, std::vector parameterTypeIDs, + common::DataTypeID returnTypeID, scalar_bind_func bindFunc) + : name{std::move(name)}, parameterTypeIDs{std::move(parameterTypeIDs)}, + returnTypeID{returnTypeID}, bindFunc{std::move(bindFunc)} {} inline std::string signatureToString() const { std::string result = common::Types::dataTypesToString(parameterTypeIDs); @@ -21,6 +31,8 @@ struct FunctionDefinition { std::string name; std::vector parameterTypeIDs; common::DataTypeID returnTypeID; + // This function is used to bind parameter/return types for functions with nested dataType. + scalar_bind_func bindFunc; }; } // namespace function diff --git a/src/include/function/hash/hash_operations.h b/src/include/function/hash/hash_operations.h index 0f0410225f..078acef98a 100644 --- a/src/include/function/hash/hash_operations.h +++ b/src/include/function/hash/hash_operations.h @@ -100,6 +100,12 @@ inline void Hash::operation(const common::interval_t& key, common::hash_t& resul combineHashScalar(murmurhash64(key.days), murmurhash64(key.micros))); } +template<> +inline void Hash::operation(const common::ku_list_t& key, common::hash_t& result) { + throw common::RuntimeException( + "Computing hash value of list DataType is currently unsupported."); +} + template<> inline void Hash::operation(const std::unordered_set& key, common::hash_t& result) { for (auto&& s : key) { diff --git a/src/include/function/list/vector_list_operations.h b/src/include/function/list/vector_list_operations.h index 34dc75912d..128f739f25 100644 --- a/src/include/function/list/vector_list_operations.h +++ b/src/include/function/list/vector_list_operations.h @@ -93,7 +93,7 @@ struct VectorListOperations : public VectorOperations { struct ListCreationVectorOperation : public VectorListOperations { static std::vector> getDefinitions(); static void listCreationBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, common::DataType& actualReturnType); + FunctionDefinition* definition, common::DataType& actualReturnType); }; struct ListLenVectorOperation : public VectorListOperations { @@ -103,7 +103,7 @@ struct ListLenVectorOperation : public VectorListOperations { struct ListExtractVectorOperation : public VectorListOperations { static std::vector> getDefinitions(); static void listExtractBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, common::DataType& returnType); + FunctionDefinition* definition, common::DataType& returnType); }; struct ListConcatVectorOperation : public VectorListOperations { @@ -112,13 +112,13 @@ struct ListConcatVectorOperation : public VectorListOperations { struct ListAppendVectorOperation : public VectorListOperations { static void listAppendBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, common::DataType& returnType); + FunctionDefinition* definition, common::DataType& returnType); static std::vector> getDefinitions(); }; struct ListPrependVectorOperation : public VectorListOperations { static void listPrependBindFunc(const std::vector& argumentTypes, - VectorOperationDefinition* definition, common::DataType& returnType); + FunctionDefinition* definition, common::DataType& returnType); static std::vector> getDefinitions(); }; diff --git a/src/include/function/vector_operations.h b/src/include/function/vector_operations.h index a7e559d994..563e38785e 100644 --- a/src/include/function/vector_operations.h +++ b/src/include/function/vector_operations.h @@ -17,8 +17,6 @@ using scalar_exec_func = std::function>&, common::ValueVector&)>; using scalar_select_func = std::function>&, common::SelectionVector&)>; -using scalar_bind_func = std::function&, VectorOperationDefinition*, common::DataType&)>; struct VectorOperationDefinition : public FunctionDefinition { @@ -37,14 +35,13 @@ struct VectorOperationDefinition : public FunctionDefinition { VectorOperationDefinition(std::string name, std::vector parameterTypeIDs, common::DataTypeID returnTypeID, scalar_exec_func execFunc, scalar_select_func selectFunc, scalar_bind_func bindFunc, bool isVarLength = false) - : FunctionDefinition{std::move(name), std::move(parameterTypeIDs), returnTypeID}, + : FunctionDefinition{std::move(name), std::move(parameterTypeIDs), returnTypeID, + std::move(bindFunc)}, execFunc{std::move(execFunc)}, - selectFunc(std::move(selectFunc)), bindFunc{std::move(bindFunc)}, isVarLength{ - isVarLength} {} + selectFunc(std::move(selectFunc)), isVarLength{isVarLength} {} scalar_exec_func execFunc; scalar_select_func selectFunc; - scalar_bind_func bindFunc; // Currently we only one variable-length function which is list creation. The expectation is // that all parameters must have the same type as parameterTypes[0]. bool isVarLength; diff --git a/src/processor/operator/aggregate/aggregate_hash_table.cpp b/src/processor/operator/aggregate/aggregate_hash_table.cpp index c411eb0686..70b4ef683c 100644 --- a/src/processor/operator/aggregate/aggregate_hash_table.cpp +++ b/src/processor/operator/aggregate/aggregate_hash_table.cpp @@ -438,7 +438,7 @@ void AggregateHashTable::updateDistinctAggState( aggregateVector, 1 /* Distinct aggregate should ignore multiplicity since they are known to be non-distinct. */ , - pos); + pos, &memoryManager); } } } @@ -714,12 +714,12 @@ void AggregateHashTable::updateNullAggVectorState( if (groupByUnflatHashKeyVectors.empty()) { auto pos = groupByFlatHashKeyVectors[0]->state->selVector->selectedPositions[0]; aggregateFunction->updatePosState(hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, - nullptr, multiplicity, 0 /* dummy pos */); + nullptr, multiplicity, 0 /* dummy pos */, &memoryManager); } else if (groupByUnflatHashKeyVectors[0]->state->selVector->isUnfiltered()) { auto selectedSize = groupByUnflatHashKeyVectors[0]->state->selVector->selectedSize; for (auto i = 0u; i < selectedSize; i++) { aggregateFunction->updatePosState(hashSlotsToUpdateAggState[i]->entry + aggStateOffset, - nullptr, multiplicity, 0 /* dummy pos */); + nullptr, multiplicity, 0 /* dummy pos */, &memoryManager); } } else { auto selectedSize = groupByUnflatHashKeyVectors[0]->state->selVector->selectedSize; @@ -727,7 +727,7 @@ void AggregateHashTable::updateNullAggVectorState( auto pos = groupByUnflatHashKeyVectors[0]->state->selVector->selectedPositions[i]; aggregateFunction->updatePosState( hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, nullptr, multiplicity, - 0 /* dummy pos */); + 0 /* dummy pos */, &memoryManager); } } } @@ -741,7 +741,7 @@ void AggregateHashTable::updateBothFlatAggVectorState( aggregateFunction->updatePosState( hashSlotsToUpdateAggState[hashVector->state->selVector->selectedPositions[0]]->entry + aggStateOffset, - aggVector, multiplicity, aggPos); + aggVector, multiplicity, aggPos, &memoryManager); } } @@ -757,14 +757,14 @@ void AggregateHashTable::updateFlatUnflatKeyFlatAggVectorState( for (auto i = 0u; i < selectedSize; i++) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[i]->entry + aggStateOffset, aggVector, multiplicity, - aggPos); + aggPos, &memoryManager); } } else { for (auto i = 0u; i < selectedSize; i++) { auto pos = groupByUnflatHashKeyVectors[0]->state->selVector->selectedPositions[i]; aggregateFunction->updatePosState( hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, aggVector, multiplicity, - aggPos); + aggPos, &memoryManager); } } } @@ -781,13 +781,15 @@ void AggregateHashTable::updateFlatKeyUnflatAggVectorState( for (auto i = 0u; i < aggVecSelectedSize; i++) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[groupByKeyPos]->entry + aggStateOffset, aggVector, - multiplicity, aggVector->state->selVector->selectedPositions[i]); + multiplicity, aggVector->state->selVector->selectedPositions[i], + &memoryManager); } } else { for (auto i = 0u; i < aggVecSelectedSize; i++) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[groupByKeyPos]->entry + aggStateOffset, aggVector, - multiplicity, aggVector->state->selVector->selectedPositions[i]); + multiplicity, aggVector->state->selVector->selectedPositions[i], + &memoryManager); } } } else { @@ -796,7 +798,7 @@ void AggregateHashTable::updateFlatKeyUnflatAggVectorState( if (!aggVector->isNull(i)) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[0]->entry + aggStateOffset, aggVector, - multiplicity, i); + multiplicity, i, &memoryManager); } } } else { @@ -805,7 +807,7 @@ void AggregateHashTable::updateFlatKeyUnflatAggVectorState( if (!aggVector->isNull(pos)) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[0]->entry + aggStateOffset, aggVector, - multiplicity, pos); + multiplicity, pos, &memoryManager); } } } @@ -822,14 +824,14 @@ void AggregateHashTable::updateBothUnflatSameDCAggVectorState( for (auto i = 0u; i < aggVector->state->selVector->selectedSize; i++) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[i]->entry + aggStateOffset, aggVector, multiplicity, - i); + i, &memoryManager); } } else { for (auto i = 0u; i < aggVector->state->selVector->selectedSize; i++) { auto pos = aggVector->state->selVector->selectedPositions[i]; aggregateFunction->updatePosState( hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, aggVector, multiplicity, - pos); + pos, &memoryManager); } } } else { @@ -838,7 +840,7 @@ void AggregateHashTable::updateBothUnflatSameDCAggVectorState( if (!aggVector->isNull(i)) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[i]->entry + aggStateOffset, aggVector, - multiplicity, i); + multiplicity, i, &memoryManager); } } } else { @@ -847,7 +849,7 @@ void AggregateHashTable::updateBothUnflatSameDCAggVectorState( if (!aggVector->isNull(pos)) { aggregateFunction->updatePosState( hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, aggVector, - multiplicity, pos); + multiplicity, pos, &memoryManager); } } } @@ -862,14 +864,15 @@ void AggregateHashTable::updateBothUnflatDifferentDCAggVectorState( auto selectedSize = groupByUnflatHashKeyVectors[0]->state->selVector->selectedSize; if (groupByUnflatHashKeyVectors[0]->state->selVector->isUnfiltered()) { for (auto i = 0u; i < selectedSize; i++) { - aggregateFunction->updateAllState( - hashSlotsToUpdateAggState[i]->entry + aggStateOffset, aggVector, multiplicity); + aggregateFunction->updateAllState(hashSlotsToUpdateAggState[i]->entry + aggStateOffset, + aggVector, multiplicity, &memoryManager); } } else { for (auto i = 0u; i < selectedSize; i++) { auto pos = groupByUnflatHashKeyVectors[0]->state->selVector->selectedPositions[i]; aggregateFunction->updateAllState( - hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, aggVector, multiplicity); + hashSlotsToUpdateAggState[pos]->entry + aggStateOffset, aggVector, multiplicity, + &memoryManager); } } } diff --git a/src/processor/operator/aggregate/base_aggregate_scan.cpp b/src/processor/operator/aggregate/base_aggregate_scan.cpp index f26d832f17..57a2643453 100644 --- a/src/processor/operator/aggregate/base_aggregate_scan.cpp +++ b/src/processor/operator/aggregate/base_aggregate_scan.cpp @@ -18,8 +18,7 @@ void BaseAggregateScan::writeAggregateResultToVector( if (aggregateState->isNull) { vector.setNull(pos, true); } else { - memcpy(vector.getData() + pos * vector.getNumBytesPerValue(), aggregateState->getResult(), - vector.getNumBytesPerValue()); + aggregateState->moveResultToVector(&vector, pos); } } diff --git a/src/processor/operator/aggregate/simple_aggregate.cpp b/src/processor/operator/aggregate/simple_aggregate.cpp index 781caa033e..01f7a5b241 100644 --- a/src/processor/operator/aggregate/simple_aggregate.cpp +++ b/src/processor/operator/aggregate/simple_aggregate.cpp @@ -65,7 +65,8 @@ void SimpleAggregate::executeInternal(ExecutionContext* context) { aggVector, 1 /* Distinct aggregate should ignore multiplicity since they are known to be non-distinct. */ , - aggVector->state->selVector->selectedPositions[0]); + aggVector->state->selVector->selectedPositions[0], + context->memoryManager); } } } else { @@ -73,11 +74,12 @@ void SimpleAggregate::executeInternal(ExecutionContext* context) { if (!aggVector->isNull(aggVector->state->selVector->selectedPositions[0])) { aggregateFunction->updatePosState((uint8_t*)localAggregateStates[i].get(), aggVector, resultSet->multiplicity, - aggVector->state->selVector->selectedPositions[0]); + aggVector->state->selVector->selectedPositions[0], + context->memoryManager); } } else { aggregateFunction->updateAllState((uint8_t*)localAggregateStates[i].get(), - aggVector, resultSet->multiplicity); + aggVector, resultSet->multiplicity, context->memoryManager); } } } diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp index e7a4245307..50f7de9011 100644 --- a/test/runner/e2e_ddl_test.cpp +++ b/test/runner/e2e_ddl_test.cpp @@ -479,8 +479,7 @@ class TinySnbDDLTest : public DBTest { validateDatabaseFileAfterCheckpointAddProperty( bwdListOriginalVersionFileName, bwdListWALVersionFileName, hasOverflow); // Note: the default value of the new property is NULL if not specified by the user. - auto result = conn->query(StringUtils::string_format( - "MATCH (:person)-[e:studyAt]->(:organisation) return e.random")); + auto result = conn->query("MATCH (:person)-[e:studyAt]->(:organisation) return e.random"); while (result->hasNext()) { ASSERT_TRUE(result->getNext()->getValue(0 /* idx */)->isNull()); } diff --git a/test/test_files/tinysnb/agg/distinct_agg.test b/test/test_files/tinysnb/agg/distinct_agg.test index fa103bb647..6dbb3a69d9 100644 --- a/test/test_files/tinysnb/agg/distinct_agg.test +++ b/test/test_files/tinysnb/agg/distinct_agg.test @@ -23,3 +23,14 @@ -ENUMERATE ---- 1 5 + +-NAME SimpleDistinctCollectINT64Test +-QUERY MATCH (p:person) RETURN collect(distinct p.age) +---- 1 +[35,30,45,20,25,40,83] + +-NAME HashDistinctCollectDoubleTest +-QUERY MATCH (p:person) RETURN p.gender, collect(distinct p.isStudent) +---- 2 +1|[True,False] +2|[True,False] diff --git a/test/test_files/tinysnb/agg/hash.test b/test/test_files/tinysnb/agg/hash.test index f1242e2051..ef7e16d5e2 100644 --- a/test/test_files/tinysnb/agg/hash.test +++ b/test/test_files/tinysnb/agg/hash.test @@ -101,3 +101,60 @@ Alice|1 3 4 6 + +-NAME HashCollectINT64Test +-QUERY MATCH (p:person) RETURN p.gender, collect(p.age) +---- 2 +1|[35,45,20] +2|[30,20,25,40,83] + +-NAME HashCollectSTRINGTest +-QUERY MATCH (p:person) RETURN p.age, collect(p.fName) +---- 7 +35|[Alice] +30|[Bob] +45|[Carol] +20|[Dan,Elizabeth] +25|[Farooq] +40|[Greg] +83|[Hubert Blaine Wolfeschlegelsteinhausenbergerdorff] + +-NAME HashCollectLISTOfINT64Test +-QUERY MATCH (p:person) RETURN p.gender, collect(p.workedHours) +---- 2 +1|[[10,5],[4,5],[2]] +2|[[12,8],[1,9],[3,4,5,6,7],[1],[10,11,12,3,4,5,6,7]] + +-NAME HashCollectLISTOfSTRINGTest +-QUERY MATCH (p:person) RETURN p.isStudent, collect(p.usedNames) +---- 2 +True|[[Aida],[Bobby],[Fesdwe]] +False|[[Carmen,Fred],[Wolfeschlegelstein,Daniel],[Ein],[Grad],[Ad,De,Hi,Kye,Orlan]] + +-NAME HashCollectLISTOfLISTOfINT64Test +-QUERY MATCH (p:person) RETURN p.ID, collect(p.courseScoresPerTerm) +---- 8 +0|[[[10,8],[6,7,8]]] +2|[[[8,9],[9,10]]] +3|[[[8,10]]] +5|[[[7,4],[8,8],[9]]] +7|[[[6],[7],[8]]] +8|[[[8]]] +9|[[[10]]] +10|[[[7],[10],[6,7]]] + +-NAME HashCollectLISTOfSTRINGMultiThreadTest +-QUERY MATCH (p:person) RETURN p.isStudent, collect(p.usedNames) +-PARALLELISM 4 +---- 2 +True|[[Aida],[Bobby],[Fesdwe]] +False|[[Carmen,Fred],[Wolfeschlegelstein,Daniel],[Ein],[Grad],[Ad,De,Hi,Kye,Orlan]] + +-NAME HashCollectOneHopTest +-QUERY MATCH (a:person)-[:knows]->(b:person) RETURN a.fName, collect(b.age) +---- 5 +Alice|[30,45,20] +Bob|[35,45,20] +Carol|[35,30,20] +Dan|[35,30,45] +Elizabeth|[25,40] diff --git a/test/test_files/tinysnb/agg/simple.test b/test/test_files/tinysnb/agg/simple.test index e83d180fcc..056dcce264 100644 --- a/test/test_files/tinysnb/agg/simple.test +++ b/test/test_files/tinysnb/agg/simple.test @@ -55,3 +55,34 @@ False -ENUMERATE ---- 1 455|0|4.935714 + +-NAME SimpleCollectINT64Test +-QUERY MATCH (p:person) RETURN collect(p.age) +---- 1 +[35,30,45,20,20,25,40,83] + +-NAME SimpleCollectSTRINGTest +-QUERY MATCH (p:person) RETURN collect(p.fName) +---- 1 +[Alice,Bob,Carol,Dan,Elizabeth,Farooq,Greg,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff] + +-NAME SimpleCollectLISTOfINT64Test +-QUERY MATCH (p:person) RETURN collect(p.workedHours) +---- 1 +[[10,5],[12,8],[4,5],[1,9],[2],[3,4,5,6,7],[1],[10,11,12,3,4,5,6,7]] + +-NAME SimpleCollectLISTOfSTRINGTest +-QUERY MATCH (p:person) RETURN collect(p.usedNames) +---- 1 +[[Aida],[Bobby],[Carmen,Fred],[Wolfeschlegelstein,Daniel],[Ein],[Fesdwe],[Grad],[Ad,De,Hi,Kye,Orlan]] + +-NAME SimpleCollectLISTOfLISTOfINT64Test +-QUERY MATCH (p:person) RETURN collect(p.courseScoresPerTerm) +---- 1 +[[[10,8],[6,7,8]],[[8,9],[9,10]],[[8,10]],[[7,4],[8,8],[9]],[[6],[7],[8]],[[8]],[[10]],[[7],[10],[6,7]]] + +-NAME SimpleCollectLISTOfINT64MultiThreadTest +-QUERY MATCH (p:person) RETURN collect(p.workedHours) +-PARALLELISM 8 +---- 1 +[[10,5],[12,8],[4,5],[1,9],[2],[3,4,5,6,7],[1],[10,11,12,3,4,5,6,7]]