diff --git a/src/function/built_in_vector_operations.cpp b/src/function/built_in_vector_operations.cpp index c93628bb639..ce2c3176650 100644 --- a/src/function/built_in_vector_operations.cpp +++ b/src/function/built_in_vector_operations.cpp @@ -442,7 +442,14 @@ void BuiltInVectorOperations::registerListOperations() { vectorOperations.insert({LIST_SLICE_FUNC_NAME, ListSliceVectorOperation::getDefinitions()}); vectorOperations.insert({ARRAY_SLICE_FUNC_NAME, ListSliceVectorOperation::getDefinitions()}); vectorOperations.insert({LIST_SORT_FUNC_NAME, ListSortVectorOperation::getDefinitions()}); + vectorOperations.insert( + {LIST_REVERSE_SORT_FUNC_NAME, ListReverseSortVectorOperation::getDefinitions()}); vectorOperations.insert({LIST_SUM_FUNC_NAME, ListSumVectorOperation::getDefinitions()}); + vectorOperations.insert( + {LIST_DISTINCT_FUNC_NAME, ListDistinctVectorOperation::getDefinitions()}); + vectorOperations.insert({LIST_UNIQUE_FUNC_NAME, ListUniqueVectorOperation::getDefinitions()}); + vectorOperations.insert( + {LIST_ANY_VALUE_FUNC_NAME, ListAnyValueVectorOperation::getDefinitions()}); } void BuiltInVectorOperations::registerInternalIDOperation() { diff --git a/src/function/vector_list_operation.cpp b/src/function/vector_list_operation.cpp index c82fa5b5110..51e4f452f39 100644 --- a/src/function/vector_list_operation.cpp +++ b/src/function/vector_list_operation.cpp @@ -1,16 +1,20 @@ #include "binder/expression_binder.h" #include "common/types/ku_list.h" #include "common/vector/value_vector_utils.h" +#include "function/list/operations/list_any_value_operation.h" #include "function/list/operations/list_append_operation.h" #include "function/list/operations/list_concat_operation.h" #include "function/list/operations/list_contains.h" +#include "function/list/operations/list_distinct_operation.h" #include "function/list/operations/list_extract_operation.h" #include "function/list/operations/list_len_operation.h" #include "function/list/operations/list_position_operation.h" #include "function/list/operations/list_prepend_operation.h" +#include "function/list/operations/list_reverse_sort_operation.h" #include "function/list/operations/list_slice_operation.h" #include "function/list/operations/list_sort_operation.h" #include "function/list/operations/list_sum_operation.h" +#include "function/list/operations/list_unique_operation.h" #include "function/list/vector_list_operations.h" using namespace kuzu::common; @@ -340,9 +344,18 @@ std::unique_ptr ListSortVectorOperation::bindFunc( const binder::expression_vector& arguments, FunctionDefinition* definition) { auto vectorOperationDefinition = reinterpret_cast(definition); switch (arguments[0]->dataType.getChildType()->getTypeID()) { + case INT16: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case INT32: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; case INT64: { vectorOperationDefinition->execFunc = getExecFunction(arguments); } break; + case FLOAT: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; case DOUBLE: { vectorOperationDefinition->execFunc = getExecFunction(arguments); } break; @@ -384,6 +397,72 @@ scalar_exec_func ListSortVectorOperation::getExecFunction( } } +std::vector> +ListReverseSortVectorOperation::getDefinitions() { + std::vector> result; + result.push_back(std::make_unique(LIST_REVERSE_SORT_FUNC_NAME, + std::vector{VAR_LIST}, VAR_LIST, nullptr, nullptr, bindFunc, + false /* isVarlength*/)); + result.push_back(std::make_unique(LIST_REVERSE_SORT_FUNC_NAME, + std::vector{VAR_LIST, STRING}, VAR_LIST, nullptr, nullptr, bindFunc, + false /* isVarlength*/)); + return result; +} + +std::unique_ptr ListReverseSortVectorOperation::bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition) { + auto vectorOperationDefinition = reinterpret_cast(definition); + switch (arguments[0]->dataType.getChildType()->getTypeID()) { + case INT16: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case INT32: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case INT64: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case FLOAT: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case DOUBLE: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case BOOL: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case STRING: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case DATE: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case TIMESTAMP: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + case INTERVAL: { + vectorOperationDefinition->execFunc = getExecFunction(arguments); + } break; + default: { + throw common::NotImplementedException("ListReverseSortVectorOperation::bindFunc"); + } + } + return std::make_unique(arguments[0]->getDataType()); +} + +template +scalar_exec_func ListReverseSortVectorOperation::getExecFunction( + const binder::expression_vector& arguments) { + if (arguments.size() == 1) { + return UnaryListExecFunction>; + } else if (arguments.size() == 2) { + return BinaryListExecFunction>; + } else { + throw common::RuntimeException("Invalid number of arguments"); + } +} + std::vector> ListSumVectorOperation::getDefinitions() { std::vector> result; result.push_back(std::make_unique(LIST_SUM_FUNC_NAME, @@ -412,5 +491,190 @@ std::unique_ptr ListSumVectorOperation::bindFunc( return std::make_unique(resultType); } +std::vector> +ListDistinctVectorOperation::getDefinitions() { + std::vector> result; + result.push_back(std::make_unique(LIST_DISTINCT_FUNC_NAME, + std::vector{VAR_LIST}, VAR_LIST, nullptr, nullptr, bindFunc, + false /* isVarlength*/)); + return result; +} + +std::unique_ptr ListDistinctVectorOperation::bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition) { + auto vectorOperationDefinition = reinterpret_cast(definition); + switch (arguments[0]->dataType.getChildType()->getTypeID()) { + case INT16: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INT32: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INT64: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case FLOAT: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case DOUBLE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case BOOL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case STRING: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case DATE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case TIMESTAMP: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INTERVAL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + default: { + throw common::NotImplementedException("ListDistinctVectorOperation::bindFunc"); + } + } + return std::make_unique(arguments[0]->getDataType()); +} + +std::vector> +ListUniqueVectorOperation::getDefinitions() { + std::vector> result; + result.push_back(std::make_unique(LIST_UNIQUE_FUNC_NAME, + std::vector{VAR_LIST}, INT64, nullptr, nullptr, bindFunc, + false /* isVarlength*/)); + return result; +} + +std::unique_ptr ListUniqueVectorOperation::bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition) { + auto vectorOperationDefinition = reinterpret_cast(definition); + switch (arguments[0]->dataType.getChildType()->getTypeID()) { + case INT16: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INT32: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INT64: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case FLOAT: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case DOUBLE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case BOOL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case STRING: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case DATE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case TIMESTAMP: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + case INTERVAL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction>; + } break; + default: { + throw common::NotImplementedException("ListUniqueVectorOperation::bindFunc"); + } + } + return std::make_unique(DataType(INT64)); +} + +std::vector> +ListAnyValueVectorOperation::getDefinitions() { + std::vector> result; + result.push_back(std::make_unique(LIST_ANY_VALUE_FUNC_NAME, + std::vector{VAR_LIST}, ANY, nullptr, nullptr, bindFunc, + false /* isVarlength*/)); + return result; +} + +std::unique_ptr ListAnyValueVectorOperation::bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition) { + auto vectorOperationDefinition = reinterpret_cast(definition); + auto resultType = *arguments[0]->getDataType().getChildType(); + switch (resultType.typeID) { + case INT16: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case INT32: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case INT64: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case FLOAT: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case DOUBLE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case BOOL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case STRING: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case DATE: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case TIMESTAMP: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case INTERVAL: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + case VAR_LIST: { + vectorOperationDefinition->execFunc = + UnaryListExecFunction; + } break; + default: { + throw common::NotImplementedException("ListAnyValueVectorOperation::bindFunc"); + } + } + return std::make_unique(resultType); +} + } // namespace function } // namespace kuzu diff --git a/src/include/common/expression_type.h b/src/include/common/expression_type.h index 23237f13d38..9380cceb3bd 100644 --- a/src/include/common/expression_type.h +++ b/src/include/common/expression_type.h @@ -57,6 +57,10 @@ const std::string LIST_SLICE_FUNC_NAME = "LIST_SLICE"; const std::string ARRAY_SLICE_FUNC_NAME = "ARRAY_SLICE"; const std::string LIST_SUM_FUNC_NAME = "LIST_SUM"; const std::string LIST_SORT_FUNC_NAME = "LIST_SORT"; +const std::string LIST_REVERSE_SORT_FUNC_NAME = "LIST_REVERSE_SORT"; +const std::string LIST_DISTINCT_FUNC_NAME = "LIST_DISTINCT"; +const std::string LIST_UNIQUE_FUNC_NAME = "LIST_UNIQUE"; +const std::string LIST_ANY_VALUE_FUNC_NAME = "LIST_ANY_VALUE"; // struct const std::string STRUCT_PACK_FUNC_NAME = "STRUCT_PACK"; diff --git a/src/include/function/list/operations/base_list_sort_operation.h b/src/include/function/list/operations/base_list_sort_operation.h new file mode 100644 index 00000000000..c56a08cdf28 --- /dev/null +++ b/src/include/function/list/operations/base_list_sort_operation.h @@ -0,0 +1,106 @@ +#pragma once + +#include "common/vector/value_vector.h" + +namespace kuzu { +namespace function { +namespace operation { + +struct BaseListSortOperation { +public: + static inline bool isAscOrder(const std::string& sortOrder) { + if (sortOrder == "ASC") { + return true; + } else if (sortOrder == "DESC") { + return false; + } else { + throw common::RuntimeException("Invalid sortOrder"); + } + } + + static inline bool isNullFirst(const std::string& nullOrder) { + if (nullOrder == "NULLS FIRST") { + return true; + } else if (nullOrder == "NULLS LAST") { + return false; + } else { + throw common::RuntimeException("Invalid nullOrder"); + } + } + + template + static void sortValues(common::list_entry_t& input, common::list_entry_t& result, + common::ValueVector& inputVector, common::ValueVector& resultVector, bool ascOrder, + bool nullFirst) { + // TODO(Ziyi) - Replace this sort implementation with radix_sort implementation: + // https://github.com/kuzudb/kuzu/issues/1536. + auto inputValues = common::ListVector::getListValues(&inputVector, input); + auto inputDataVector = common::ListVector::getDataVector(&inputVector); + + // Calculate null count. + auto nullCount = 0; + for (auto i = 0; i < input.size; i++) { + if (inputDataVector->isNull(input.offset + i)) { + nullCount += 1; + } + } + + result = common::ListVector::addList(&resultVector, input.size); + auto resultValues = common::ListVector::getListValues(&resultVector, result); + auto resultDataVector = common::ListVector::getDataVector(&resultVector); + auto numBytesPerValue = resultDataVector->getNumBytesPerValue(); + + // Add nulls first. + if (nullFirst) { + setVectorRangeToNull(*resultDataVector, result.offset, 0, nullCount); + resultValues += numBytesPerValue * nullCount; + } + + // Add actual data. + for (auto i = 0; i < input.size; i++) { + if (inputDataVector->isNull(input.offset + i)) { + inputValues += numBytesPerValue; + continue; + } + common::ValueVectorUtils::copyValue( + resultValues, *resultDataVector, inputValues, *inputDataVector); + resultValues += numBytesPerValue; + inputValues += numBytesPerValue; + } + + // Add nulls in the end. + if (!nullFirst) { + setVectorRangeToNull( + *resultDataVector, result.offset, input.size - nullCount, input.size); + resultValues += numBytesPerValue * nullCount; + } + + // Determine the starting and ending position of the data to be sorted. + auto sortStart = nullCount; + auto sortEnd = input.size; + if (!nullFirst) { + sortStart = 0; + sortEnd = input.size - nullCount; + } + + // Sort the data based on order. + auto sortingValues = + reinterpret_cast(common::ListVector::getListValues(&resultVector, result)); + if (ascOrder) { + std::sort(sortingValues + sortStart, sortingValues + sortEnd, std::less{}); + } else { + std::sort(sortingValues + sortStart, sortingValues + sortEnd, std::greater{}); + } + } + + static void setVectorRangeToNull( + common::ValueVector& vector, uint64_t offset, uint64_t startPos, uint64_t endPos) { + for (auto i = startPos; i < endPos; i++) { + vector.setNull(offset + i, true); + } + } +}; + +} // namespace operation +} // namespace function +} // namespace kuzu diff --git a/src/include/function/list/operations/list_any_value_operation.h b/src/include/function/list/operations/list_any_value_operation.h new file mode 100644 index 00000000000..e99302b025c --- /dev/null +++ b/src/include/function/list/operations/list_any_value_operation.h @@ -0,0 +1,30 @@ +#pragma once + +#include "common/vector/value_vector_utils.h" + +namespace kuzu { +namespace function { +namespace operation { + +struct ListAnyValue { + template + static inline void operation(common::list_entry_t& input, T& result, + common::ValueVector& inputVector, common::ValueVector& resultVector) { + auto inputValues = common::ListVector::getListValues(&inputVector, input); + auto inputDataVector = common::ListVector::getDataVector(&inputVector); + auto numBytesPerValue = inputDataVector->getNumBytesPerValue(); + + for (auto i = 0; i < input.size; i++) { + if (!(inputDataVector->isNull(input.offset + i))) { + common::ValueVectorUtils::copyValue( + (uint8_t*)(&result), resultVector, inputValues, *inputDataVector); + break; + } + inputValues += numBytesPerValue; + } + } +}; + +} // namespace operation +} // namespace function +} // namespace kuzu diff --git a/src/include/function/list/operations/list_distinct_operation.h b/src/include/function/list/operations/list_distinct_operation.h new file mode 100644 index 00000000000..b81308db3fc --- /dev/null +++ b/src/include/function/list/operations/list_distinct_operation.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +#include "common/vector/value_vector_utils.h" + +namespace kuzu { +namespace function { +namespace operation { + +template +struct ListDistinct { + static inline void operation(common::list_entry_t& input, common::list_entry_t& result, + common::ValueVector& inputVector, common::ValueVector& resultVector) { + std::set uniqueValues; + auto inputValues = + reinterpret_cast(common::ListVector::getListValues(&inputVector, input)); + auto inputDataVector = common::ListVector::getDataVector(&inputVector); + + for (auto i = 0; i < input.size; i++) { + if (inputDataVector->isNull(input.offset + i)) { + continue; + } + uniqueValues.insert(inputValues[i]); + } + + result = common::ListVector::addList(&resultVector, uniqueValues.size()); + auto resultValues = common::ListVector::getListValues(&resultVector, result); + auto resultDataVector = common::ListVector::getDataVector(&resultVector); + auto numBytesPerValue = inputDataVector->getNumBytesPerValue(); + for (auto val : uniqueValues) { + common::ValueVectorUtils::copyValue(resultValues, *resultDataVector, + reinterpret_cast(&val), *inputDataVector); + resultValues += numBytesPerValue; + } + } +}; + +} // namespace operation +} // namespace function +} // namespace kuzu diff --git a/src/include/function/list/operations/list_reverse_sort_operation.h b/src/include/function/list/operations/list_reverse_sort_operation.h new file mode 100644 index 00000000000..f3abf7459d0 --- /dev/null +++ b/src/include/function/list/operations/list_reverse_sort_operation.h @@ -0,0 +1,28 @@ +#pragma once + +#include "base_list_sort_operation.h" +#include "common/vector/value_vector.h" + +namespace kuzu { +namespace function { +namespace operation { + +template +struct ListReverseSort : BaseListSortOperation { + static inline void operation(common::list_entry_t& input, common::list_entry_t& result, + common::ValueVector& inputVector, common::ValueVector& resultVector) { + sortValues( + input, result, inputVector, resultVector, false /* ascOrder */, true /* nullFirst */); + } + + static inline void operation(common::list_entry_t& input, common::ku_string_t& nullOrder, + common::list_entry_t& result, common::ValueVector& inputVector, + common::ValueVector& valueVector, common::ValueVector& resultVector) { + sortValues(input, result, inputVector, resultVector, false /* ascOrder */, + isNullFirst(nullOrder.getAsString()) /* nullFirst */); + } +}; + +} // namespace operation +} // namespace function +} // namespace kuzu diff --git a/src/include/function/list/operations/list_sort_operation.h b/src/include/function/list/operations/list_sort_operation.h index dc64d644359..9a8b58fad6b 100644 --- a/src/include/function/list/operations/list_sort_operation.h +++ b/src/include/function/list/operations/list_sort_operation.h @@ -1,5 +1,6 @@ #pragma once +#include "base_list_sort_operation.h" #include "common/vector/value_vector.h" namespace kuzu { @@ -7,117 +8,26 @@ namespace function { namespace operation { template -struct ListSort { +struct ListSort : BaseListSortOperation { static inline void operation(common::list_entry_t& input, common::list_entry_t& result, common::ValueVector& inputVector, common::ValueVector& resultVector) { - sortValues( + sortValues( input, result, inputVector, resultVector, true /* ascOrder */, true /* nullFirst */); } static inline void operation(common::list_entry_t& input, common::ku_string_t& sortOrder, common::list_entry_t& result, common::ValueVector& inputVector, common::ValueVector& valueVector, common::ValueVector& resultVector) { - sortValues(input, result, inputVector, resultVector, isAscOrder(sortOrder.getAsString()), + sortValues(input, result, inputVector, resultVector, isAscOrder(sortOrder.getAsString()), true /* nullFirst */); } static inline void operation(common::list_entry_t& input, common::ku_string_t& sortOrder, common::ku_string_t& nullOrder, common::list_entry_t& result, common::ValueVector& inputVector, common::ValueVector& resultVector) { - sortValues(input, result, inputVector, resultVector, isAscOrder(sortOrder.getAsString()), + sortValues(input, result, inputVector, resultVector, isAscOrder(sortOrder.getAsString()), isNullFirst(nullOrder.getAsString())); } - - static inline bool isAscOrder(const std::string& sortOrder) { - if (sortOrder == "ASC") { - return true; - } else if (sortOrder == "DESC") { - return false; - } else { - throw common::RuntimeException("Invalid sortOrder"); - } - } - - static inline bool isNullFirst(const std::string& nullOrder) { - if (nullOrder == "NULLS FIRST") { - return true; - } else if (nullOrder == "NULLS LAST") { - return false; - } else { - throw common::RuntimeException("Invalid nullOrder"); - } - } - - static void sortValues(common::list_entry_t& input, common::list_entry_t& result, - common::ValueVector& inputVector, common::ValueVector& resultVector, bool ascOrder, - bool nullFirst) { - // TODO(Ziyi) - Replace this sort implementation with radix_sort implementation: - // https://github.com/kuzudb/kuzu/issues/1536. - auto inputValues = common::ListVector::getListValues(&inputVector, input); - auto inputDataVector = common::ListVector::getDataVector(&inputVector); - - // Calculate null count. - auto nullCount = 0; - for (auto i = 0; i < input.size; i++) { - if (inputDataVector->isNull(input.offset + i)) { - nullCount += 1; - } - } - - result = common::ListVector::addList(&resultVector, input.size); - auto resultValues = common::ListVector::getListValues(&resultVector, result); - auto resultDataVector = common::ListVector::getDataVector(&resultVector); - auto numBytesPerValue = resultDataVector->getNumBytesPerValue(); - - // Add nulls first. - if (nullFirst) { - setVectorRangeToNull(*resultDataVector, result.offset, 0, nullCount); - resultValues += numBytesPerValue * nullCount; - } - - // Add actual data. - for (auto i = 0; i < input.size; i++) { - if (inputDataVector->isNull(input.offset + i)) { - inputValues += numBytesPerValue; - continue; - } - common::ValueVectorUtils::copyValue( - resultValues, *resultDataVector, inputValues, *inputDataVector); - resultValues += numBytesPerValue; - inputValues += numBytesPerValue; - } - - // Add nulls in the end. - if (!nullFirst) { - setVectorRangeToNull( - *resultDataVector, result.offset, input.size - nullCount, input.size); - resultValues += numBytesPerValue * nullCount; - } - - // Determine the starting and ending position of the data to be sorted. - auto sortStart = nullCount; - auto sortEnd = input.size; - if (!nullFirst) { - sortStart = 0; - sortEnd = input.size - nullCount; - } - - // Sort the data based on order. - auto sortingValues = - reinterpret_cast(common::ListVector::getListValues(&resultVector, result)); - if (ascOrder) { - std::sort(sortingValues + sortStart, sortingValues + sortEnd, std::less{}); - } else { - std::sort(sortingValues + sortStart, sortingValues + sortEnd, std::greater{}); - } - } - - static void setVectorRangeToNull( - common::ValueVector& vector, uint64_t offset, uint64_t startPos, uint64_t endPos) { - for (auto i = startPos; i < endPos; i++) { - vector.setNull(offset + i, true); - } - } }; } // namespace operation diff --git a/src/include/function/list/operations/list_unique_operation.h b/src/include/function/list/operations/list_unique_operation.h new file mode 100644 index 00000000000..3cd7d02886d --- /dev/null +++ b/src/include/function/list/operations/list_unique_operation.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +#include "common/vector/value_vector_utils.h" + +namespace kuzu { +namespace function { +namespace operation { + +template +struct ListUnique { + static inline void operation(common::list_entry_t& input, int64_t& result, + common::ValueVector& inputVector, common::ValueVector& resultVector) { + std::set uniqueValues; + auto inputValues = + reinterpret_cast(common::ListVector::getListValues(&inputVector, input)); + auto inputDataVector = common::ListVector::getDataVector(&inputVector); + + for (auto i = 0; i < input.size; i++) { + if (inputDataVector->isNull(input.offset + i)) { + continue; + } + uniqueValues.insert(inputValues[i]); + } + result = uniqueValues.size(); + } +}; + +} // namespace operation +} // namespace function +} // namespace kuzu diff --git a/src/include/function/list/vector_list_operations.h b/src/include/function/list/vector_list_operations.h index 362bc337e57..f6fddca4799 100644 --- a/src/include/function/list/vector_list_operations.h +++ b/src/include/function/list/vector_list_operations.h @@ -144,11 +144,37 @@ struct ListSortVectorOperation : public VectorListOperations { static scalar_exec_func getExecFunction(const binder::expression_vector& arguments); }; +struct ListReverseSortVectorOperation : public VectorListOperations { + static std::vector> getDefinitions(); + static std::unique_ptr bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition); + template + static scalar_exec_func getExecFunction(const binder::expression_vector& arguments); +}; + struct ListSumVectorOperation : public VectorListOperations { static std::vector> getDefinitions(); static std::unique_ptr bindFunc( const binder::expression_vector& arguments, FunctionDefinition* definition); }; +struct ListDistinctVectorOperation : public VectorListOperations { + static std::vector> getDefinitions(); + static std::unique_ptr bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition); +}; + +struct ListUniqueVectorOperation : public VectorListOperations { + static std::vector> getDefinitions(); + static std::unique_ptr bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition); +}; + +struct ListAnyValueVectorOperation : public VectorListOperations { + static std::vector> getDefinitions(); + static std::unique_ptr bindFunc( + const binder::expression_vector& arguments, FunctionDefinition* definition); +}; + } // namespace function } // namespace kuzu diff --git a/test/test_files/tinysnb/function/list.test b/test/test_files/tinysnb/function/list.test index ae70e169a25..30426e7bec7 100644 --- a/test/test_files/tinysnb/function/list.test +++ b/test/test_files/tinysnb/function/list.test @@ -728,21 +728,66 @@ True ---- 1 6.600000 --NAME ListSortIntAsc +-NAME ListSortInt16Asc +-QUERY Return list_sort([to_int16(2), to_int16(3), to_int16(1), NULL, NULL]); +---- 1 +[,,1,2,3] + +-NAME ListSortInt16Desc +-QUERY Return list_sort([to_int16(2), to_int16(3), to_int16(1), to_int16(5), NULL], 'DESC'); +---- 1 +[,5,3,2,1] + +-NAME ListSortInt16DescWithNullsLast +-QUERY Return list_sort([to_int16(2), to_int16(3), to_int16(1), NULL], 'DESC', 'NULLS LAST'); +---- 1 +[3,2,1,] + +-NAME ListSortInt32Asc +-QUERY Return list_sort([to_int32(2), to_int32(3), to_int32(1), NULL, NULL]); +---- 1 +[,,1,2,3] + +-NAME ListSortInt32Desc +-QUERY Return list_sort([to_int32(2), to_int32(3), to_int32(1), to_int32(5), NULL], 'DESC'); +---- 1 +[,5,3,2,1] + +-NAME ListSortInt32DescWithNullsLast +-QUERY Return list_sort([to_int32(2), to_int32(3), to_int32(1), NULL], 'DESC', 'NULLS LAST'); +---- 1 +[3,2,1,] + +-NAME ListSortInt64Asc -QUERY Return list_sort([2, 3, 1, NULL, NULL]); ---- 1 [,,1,2,3] --NAME ListSortIntDesc +-NAME ListSortInt64Desc -QUERY Return list_sort([2, 3, 1, 5, NULL], 'DESC'); ---- 1 [,5,3,2,1] --NAME ListSortIntDescWithNullsLast +-NAME ListSortInt64DescWithNullsLast -QUERY Return list_sort([2, 3, 1, NULL], 'DESC', 'NULLS LAST'); ---- 1 [3,2,1,] +-NAME ListSortFloatAsc +-QUERY Return list_sort([to_float(1.1), to_float(2.3), to_float(4.5), NULL]); +---- 1 +[,1.100000,2.300000,4.500000] + +-NAME ListSortFloatDesc +-QUERY Return list_sort([to_float(2.1), to_float(3.2), to_float(1.33), to_float(5.44), NULL], 'DESC'); +---- 1 +[,5.440000,3.200000,2.100000,1.330000] + +-NAME ListSortFloatDescWithNullsLast +-QUERY Return list_sort([to_float(2.1), to_float(3.2), to_float(1.11), NULL], 'DESC', 'NULLS LAST'); +---- 1 +[3.200000,2.100000,1.110000,] + -NAME ListSortStringDesc -QUERY Return list_sort(['sss', 'sssss', 'abs', NULL], 'DESC'); ---- 1 @@ -827,3 +872,306 @@ True -QUERY Return list_sum([1.1, 2.2, 3.3, NULL]); ---- 1 6.600000 + +-NAME ListReverseSortInt16 +-QUERY Return list_reverse_sort([to_int16(1), to_int16(1), NULL, to_int16(-3), to_int16(1), to_int16(5)]); +---- 1 +[,5,1,1,1,-3] + +-NAME ListReverseSortInt16WithNullsLast +-QUERY Return list_reverse_sort([to_int16(1), to_int16(1), NULL, to_int16(-3), to_int16(1), to_int16(5)], 'NULLS LAST'); +---- 1 +[5,1,1,1,-3,] + +-NAME ListReverseSortInt32 +-QUERY Return list_reverse_sort([to_int32(1), to_int32(1), NULL, to_int32(-3), to_int32(1), to_int32(5)]); +---- 1 +[,5,1,1,1,-3] + +-NAME ListReverseSortInt32WithNullsLast +-QUERY Return list_reverse_sort([to_int32(1), to_int32(1), NULL, to_int32(-3), to_int32(1), to_int32(5)], 'NULLS LAST'); +---- 1 +[5,1,1,1,-3,] + +-NAME ListReverseSortInt64 +-QUERY Return list_reverse_sort([2, 3, 1, 5, NULL]); +---- 1 +[,5,3,2,1] + +-NAME ListReverseSortInt64WithNullsLast +-QUERY Return list_reverse_sort([2, 3, 1, 5, NULL], 'NULLS LAST'); +---- 1 +[5,3,2,1,] + +-NAME ListDistinctWithFloat +-QUERY Return list_reverse_sort([to_float(1.1), to_float(1.1), NULL, to_float(3.4), to_float(1.2), to_float(5.1)]); +---- 1 +[,5.100000,3.400000,1.200000,1.100000,1.100000] + +-NAME ListDistinctWithFloat +-QUERY Return list_reverse_sort([to_float(1.1), to_float(1.1), NULL, to_float(3.4), to_float(1.2), to_float(5.1)], 'NULLS LAST'); +---- 1 +[5.100000,3.400000,1.200000,1.100000,1.100000,] + +-NAME ListReverseSortDouble +-QUERY Return list_reverse_sort([1.1, 2.3, 4.5, NULL]); +---- 1 +[,4.500000,2.300000,1.100000] + +-NAME ListReverseSortDoubleWithNullsLast +-QUERY Return list_reverse_sort([1.1, 2.3, 4.5, NULL], 'NULLS LAST'); +---- 1 +[4.500000,2.300000,1.100000,] + +-NAME ListReverseSortStringSeq1 +-QUERY Return list_reverse_sort(['this is a long string', 'this is yet another long string', 'abs', NULL]); +---- 1 +[,this is yet another long string,this is a long string,abs] + +-NAME ListReverseSortWithStringSeq2 +-QUERY MATCH (a:person) RETURN list_reverse_sort([NULL, a.usedNames[1], 'Some val', NULL]) +---- 8 +[,,Some val,Ad] +[,,Some val,Aida] +[,,Some val,Bobby] +[,,Some val,Carmen] +[,,Some val,Ein] +[,,Some val,Fesdwe] +[,,Some val,Grad] +[,,Wolfeschlegelstein,Some val] + +-NAME ListReverseSortStringWithNullsLast +-QUERY Return list_reverse_sort(['sss', 'sssss', 'abs', NULL], 'NULLS LAST'); +---- 1 +[sssss,sss,abs,] + +-NAME ListReverseSortDateSeq1 +-QUERY Return list_reverse_sort([date('1992-05-03'), date('1993-05-03'), date('1994-05-03'), NULL]); +---- 1 +[,1994-05-03,1993-05-03,1992-05-03] + +-NAME ListReverseSortDateSeq2 +-QUERY Return list_reverse_sort([date('1992-05-03'), date('1993-05-03'), date('1994-05-03'), NULL], 'NULLS LAST'); +---- 1 +[1994-05-03,1993-05-03,1992-05-03,] + +-NAME ListReverseSortTimestampSeq1 +-QUERY Return list_reverse_sort([timestamp('1992-05-03 11:13:25'), timestamp('1993-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), NULL]); +---- 1 +[,1994-05-03 11:13:25,1993-05-03 11:13:25,1992-05-03 11:13:25] + +-NAME ListReverseSortTimestampSeq2 +-QUERY Return list_reverse_sort([timestamp('1992-05-03 11:13:25'), timestamp('1993-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), NULL], 'NULLS LAST'); +---- 1 +[1994-05-03 11:13:25,1993-05-03 11:13:25,1992-05-03 11:13:25,] + +-NAME ListReverseSortIntervalSeq1 +-QUERY Return list_reverse_sort([interval('2 hours 3 days 20 minutes'), interval('3 hours 3 days 20 minutes'), interval('4 hours 3 days 20 minutes'), NULL]); +---- 1 +[,3 days 04:20:00,3 days 03:20:00,3 days 02:20:00] + +-NAME ListReverseSortIntervalSeq2 +-QUERY Return list_reverse_sort([interval('2 hours 3 days 20 minutes'), interval('3 hours 3 days 20 minutes'), interval('4 hours 3 days 20 minutes'), NULL], 'NULLS LAST'); +---- 1 +[3 days 04:20:00,3 days 03:20:00,3 days 02:20:00,] + +-NAME ListReverseSortBooleanSeq1 +-QUERY Return list_reverse_sort([true, true, false, NULL]); +---- 1 +[,True,True,False] + +-NAME ListReverseSortBooleanSeq2 +-QUERY Return list_reverse_sort([true, true, false, NULL], 'NULLS LAST'); +---- 1 +[True,True,False,] + +-NAME ListDistinctWithInt16 +-QUERY Return list_distinct([to_int16(1), to_int16(1), NULL, to_int16(-3), to_int16(1), to_int16(5)]); +---- 1 +[-3,1,5] + +-NAME ListDistinctWithInt32 +-QUERY Return list_distinct([to_int32(1), to_int32(1), NULL, to_int32(-3), to_int32(1), to_int32(5)]); +---- 1 +[-3,1,5] + +-NAME ListDistinctWithInt64 +-QUERY Return list_distinct([1, 1, NULL, -3, 1, 5]); +---- 1 +[-3,1,5] + +-NAME ListDistinctWithFloat +-QUERY Return list_distinct([to_float(1.1), to_float(1.1), NULL, to_float(3.4), to_float(1.2), to_float(5.1)]); +---- 1 +[1.100000,1.200000,3.400000,5.100000] + +-NAME ListDistinctWithDouble +-QUERY Return list_distinct([1.1, 1.1, NULL, -3.4, 1.2, 5.1]); +---- 1 +[-3.400000,1.100000,1.200000,5.100000] + +-NAME ListDistinctBoolean +-QUERY Return list_distinct([true, true, NULL, false, false]); +---- 1 +[False,True] + +-NAME ListDistinctWithStringSeq1 +-QUERY Return list_distinct(['this is a long string', 'this is a long string', NULL, 'bbb', 'ccc', 'ccc']); +---- 1 +[bbb,ccc,this is a long string] + +-NAME ListDistinctWithStringSeq2 +-QUERY MATCH (a:person) RETURN list_distinct([NULL, a.usedNames[1], a.usedNames[1], NULL]) +---- 8 +[Ad] +[Aida] +[Bobby] +[Carmen] +[Ein] +[Fesdwe] +[Grad] +[Wolfeschlegelstein] + +-NAME ListDistinctWithDate +-QUERY Return list_distinct([date('1992-05-03'), date('1993-05-03'), date('1993-05-03'), date('1994-05-03'), date('1993-05-03'), NULL]); +---- 1 +[1992-05-03,1993-05-03,1994-05-03] + +-NAME ListDistinctWithTimestamp +-QUERY Return list_distinct([timestamp('1992-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), timestamp('1993-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), NULL]); +---- 1 +[1992-05-03 11:13:25,1993-05-03 11:13:25,1994-05-03 11:13:25] + +-NAME ListDistinctWithInterval +-QUERY Return list_distinct([interval('2 hours 3 days 20 minutes'), interval('2 hours 3 days 20 minutes'), interval('3 hours 3 days 20 minutes'), interval('4 hours 3 days 20 minutes'), NULL]); +---- 1 +[3 days 02:20:00,3 days 03:20:00,3 days 04:20:00] + +-NAME ListUniqueWithInt16 +-QUERY MATCH (m:movies) WHERE m.name='Roma' RETURN list_unique([NULL, to_int16(m.length), to_int16(m.length), to_int16(1)]) +---- 1 +2 + +-NAME ListUniqueWithInt32 +-QUERY MATCH (m:movies) WHERE m.name='Roma' RETURN list_unique([NULL, to_int32(m.length), to_int32(m.length), to_int32(3)]) +---- 1 +2 + +-NAME ListUniqueWithInt64 +-QUERY Return list_unique([1, 1, NULL, -3, 1, 5]); +---- 1 +3 + +-NAME ListUniqueWithFloat +-QUERY MATCH (p:person) WHERE p.ID=0 RETURN list_unique([NULL, to_float(p.gender), to_float(p.gender), to_float(4.5)]) +---- 1 +2 + +-NAME ListUniqueWithDouble +-QUERY Return list_unique([1.1, 1.1, NULL, -3.4, 1.2, 5.1]); +---- 1 +4 + +-NAME ListUniqueBoolean +-QUERY Return list_unique([true, true, NULL, false, false]); +---- 1 +2 + +-NAME ListUniqueWithStringSeq1 +-QUERY Return list_unique(['this is long string', 'this is long string', NULL, 'bbb', 'ccc', 'aaa']); +---- 1 +4 + +-NAME ListUniqueWithStringSeq2 +-QUERY MATCH (a:person) RETURN list_unique([NULL, a.usedNames[1], 'sss', NULL]) +---- 8 +2 +2 +2 +2 +2 +2 +2 +2 + +-NAME ListUniqueWithDate +-QUERY Return list_unique([date('1992-05-03'), date('1993-05-03'), date('1993-05-03'), date('1994-05-03'), date('1993-05-03'), NULL]); +---- 1 +3 + +-NAME ListUniqueWithTimestamp +-QUERY Return list_unique([timestamp('1992-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), timestamp('1993-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), NULL]); +---- 1 +3 + +-NAME ListUniqueWithInterval +-QUERY Return list_unique([interval('2 hours 3 days 20 minutes'), interval('2 hours 3 days 20 minutes'), interval('3 hours 3 days 20 minutes'), interval('4 hours 3 days 20 minutes'), NULL]); +---- 1 +3 + +-NAME ListAnyValueWithInt16 +-QUERY MATCH (m:movies) WHERE m.name='Roma' RETURN list_any_value([NULL, to_int16(m.length), to_int16(m.length)]) +---- 1 +298 + +-NAME ListAnyValueWithInt32 +-QUERY MATCH (m:movies) WHERE m.name='Roma' RETURN list_any_value([NULL, to_int32(m.length), to_int32(m.length)]) +---- 1 +298 + +-NAME ListAnyValueWithInt64 +-QUERY Return list_any_value([NULL, 1, 1, NULL, -3, 1, 5]); +---- 1 +1 + +-NAME ListAnyValueWithFloat +-QUERY MATCH (p:person) WHERE p.ID=0 RETURN list_any_value([NULL, to_float(p.gender), to_float(p.gender), to_float(2.3)]) +---- 1 +1.000000 + +-NAME ListAnyValueWithDouble +-QUERY Return list_any_value([NULL, 1.1, 1.1, NULL, -3.4, 1.2, 5.1]); +---- 1 +1.100000 + +-NAME ListAnyValueBoolean +-QUERY Return list_any_value([NULL, true, true, NULL, false]); +---- 1 +True + +-NAME ListAnyValueWithStringSeq1 +-QUERY Return list_any_value([NULL, NULL, 'this is long string', 'this is long string', NULL, 'this is long string', 'ccc', 'aaa']); +---- 1 +this is long string + +-NAME ListAnyValueWithStringSeq2 +-QUERY MATCH (a:person) RETURN list_any_value([NULL, a.usedNames[1], 'sss']) +---- 8 +Aida +Bobby +Carmen +Wolfeschlegelstein +Ein +Fesdwe +Grad +Ad + +-NAME ListAnyValueWithDate +-QUERY Return list_any_value([NULL, date('1992-05-03'), date('1993-05-03'), date('1993-05-03'), date('1994-05-03'), date('1993-05-03'), NULL]); +---- 1 +1992-05-03 + +-NAME ListAnyValueWithTimestamp +-QUERY Return list_any_value([NULL, timestamp('1992-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), timestamp('1993-05-03 11:13:25'), timestamp('1994-05-03 11:13:25'), NULL]); +---- 1 +1992-05-03 11:13:25 + +-NAME ListAnyValueWithInterval +-QUERY Return list_any_value([NULL, interval('2 hours 3 days 20 minutes'), interval('2 hours 3 days 20 minutes'), interval('3 hours 3 days 20 minutes'), interval('4 hours 3 days 20 minutes'), NULL]); +---- 1 +3 days 02:20:00 + +-NAME ListAnyValueWithList +-QUERY Return list_any_value([NULL, [1,2,3], [3,4,5]]); +---- 1 +[1,2,3]