From 4f27938329a1187126af6b775ea0af5b74b44fd0 Mon Sep 17 00:00:00 2001 From: AEsir777 Date: Fri, 10 Nov 2023 15:48:23 -0500 Subject: [PATCH] finish implemention of cast VarList to VarList --- .../function_evaluator.cpp | 6 +- src/function/vector_cast_functions.cpp | 80 +++++++++++++++---- src/include/common/vector/auxiliary_buffer.h | 1 + src/include/common/vector/value_vector.h | 5 ++ .../function/unary_function_executor.h | 7 +- .../tinysnb/cast/cast_to_nested_types.test | 22 +++++ test/test_files/tinysnb/function/cast.test | 21 ++--- test/test_files/tinysnb/function/play.test | 15 ++++ 8 files changed, 126 insertions(+), 31 deletions(-) create mode 100644 test/test_files/tinysnb/function/play.test diff --git a/src/expression_evaluator/function_evaluator.cpp b/src/expression_evaluator/function_evaluator.cpp index 033b58ab942..886e46d70c9 100644 --- a/src/expression_evaluator/function_evaluator.cpp +++ b/src/expression_evaluator/function_evaluator.cpp @@ -22,10 +22,8 @@ void FunctionExpressionEvaluator::evaluate() { child->evaluate(); } auto expr = reinterpret_cast(expression.get()); - if (expr->getFunctionName() == CAST_FUNC_NAME && - parameters[0]->dataType.getLogicalTypeID() == LogicalTypeID::STRING) { - execFunc(parameters, *resultVector, - reinterpret_cast(expr->getBindData())); + if (expr->getFunctionName() == CAST_FUNC_NAME) { + execFunc(parameters, *resultVector, expr->getBindData()); return; } if (execFunc != nullptr) { diff --git a/src/function/vector_cast_functions.cpp b/src/function/vector_cast_functions.cpp index 80a18fce607..5fcbf651a0b 100644 --- a/src/function/vector_cast_functions.cpp +++ b/src/function/vector_cast_functions.cpp @@ -34,9 +34,10 @@ static void castFixedListToString( resultVector.setValue(resultPos, result); } +template static void fixedListCastExecFunction(const std::vector>& params, ValueVector& result, void* /*dataPtr*/ = nullptr) { - assert(params.size() == 1); + KU_ASSERT(params.size() == 1); auto param = params[0]; if (param->state->isFlat()) { castFixedListToString(*param, param->state->selVector->selectedPositions[0], result, @@ -53,10 +54,25 @@ static void fixedListCastExecFunction(const std::vector +template<> +void fixedListCastExecFunction( + const std::vector>& params, ValueVector& result, + void* /*dataPtr*/) { + KU_ASSERT(params.size() == 1); + + auto inputVector = params[0].get(); + auto childNum = common::ListVector::getDataVectorSize(inputVector); + auto inputChildVector = (common::ListVector::getDataVector(inputVector)); + auto resultChildVector = (common::ListVector::getDataVector(&result)); + for (auto i = 0u; i < childNum; i++) { + castFixedListToString(*inputChildVector, i, *resultChildVector, i); + } +} + +template static void StringtoFixedListCastExecFunction( const std::vector>& params, ValueVector& result, void* dataPtr) { - assert(params.size() == 1); + KU_ASSERT(params.size() == 1); auto param = params[0]; auto csvReaderConfig = &reinterpret_cast(dataPtr)->csvConfig; if (param->state->isFlat()) { @@ -90,16 +106,33 @@ static void StringtoFixedListCastExecFunction( template<> void StringtoFixedListCastExecFunction( const std::vector>& params, ValueVector& result, void* dataPtr) { + KU_ASSERT(params.size() == 1 && + params[0]->dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST && + result.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST); + auto csvReaderConfig = &reinterpret_cast(dataPtr)->csvConfig; + auto inputVector = params[0].get(); + auto childNum = common::ListVector::getDataVectorSize(inputVector); + auto inputChildVector = (common::ListVector::getDataVector(inputVector)); + auto resultChildVector = (common::ListVector::getDataVector(&result)); + for (auto i = 0u; i < childNum; i++) { + resultChildVector->setNull(i, inputChildVector->isNull(i)); + if (!resultChildVector->isNull(i)) { + CastString::castToFixedList( + inputChildVector->getValue(i), resultChildVector, i, csvReaderConfig); + } + } } -template +template static void varListCastExecFunction(const std::vector>& params, ValueVector& result, void* dataPtr) { - assert(params.size() == 1); + KU_ASSERT(params.size() == 1); + result.resetAuxiliaryBuffer(); auto inputVector = params[0]; scalar_exec_func func = CastFunction::bindCastFunction("CAST", - inputVector->dataType.getLogicalTypeID(), result.dataType.getLogicalTypeID()) + VarListType::getChildType(&inputVector->dataType)->getLogicalTypeID(), + VarListType::getChildType(&result.dataType)->getLogicalTypeID()) ->execFunc; for (auto i = 0u; i < inputVector->state->selVector->selectedSize; i++) { auto pos = inputVector->state->selVector->selectedPositions[i]; @@ -116,10 +149,30 @@ static void varListCastExecFunction(const std::vector void varListCastExecFunction( const std::vector>& params, ValueVector& result, void* dataPtr) { - KU_ASSERT(params.size() == 1); - auto inputChildVector = params[0]; - - // TODO: Kebing finish this one + KU_ASSERT(params.size() == 1 && + params[0]->dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST && + result.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST); + result.resetAuxiliaryBuffer(); + + auto inputVector = params[0].get(); + auto childNum = common::ListVector::getDataVectorSize(inputVector); + auto inputChildVector = (common::ListVector::getSharedDataVector(inputVector)); + auto resultChildVector = (common::ListVector::getDataVector(&result)); + scalar_exec_func func = CastFunction::bindCastFunction("CAST", + VarListType::getChildType(&inputChildVector->dataType)->getLogicalTypeID(), + VarListType::getChildType(&resultChildVector->dataType)->getLogicalTypeID()) + ->execFunc; + for (auto i = 0u; i < childNum; i++) { + resultChildVector->setNull(i, inputChildVector->isNull(i)); + if (!resultChildVector->isNull(i)) { + // cast position i in child data vector + auto input_list_entry = inputChildVector->getValue(i); + auto result_list_entry = ListVector::addList(resultChildVector, input_list_entry.size); + resultChildVector->setValue(i, result_list_entry); + } + } + std::vector> childParams {inputChildVector}; + func(childParams, *resultChildVector, dataPtr); } bool CastFunction::hasImplicitCast(const LogicalType& srcType, const LogicalType& dstType) { @@ -402,7 +455,7 @@ static std::unique_ptr bindCastToStringFunction( EXECUTOR>; } break; case LogicalTypeID::FIXED_LIST: { - func = fixedListCastExecFunction; + func = fixedListCastExecFunction; } break; case LogicalTypeID::MAP: { func = @@ -810,10 +863,7 @@ std::unique_ptr CastAnyFunction::bindFunc( func->execFunc = CastFunction::bindCastFunction(func->name, inputTypeID, outputType->getLogicalTypeID()) ->execFunc; - if (inputTypeID == LogicalTypeID::STRING) { - return std::make_unique(*outputType); - } - return std::make_unique(*outputType); + return std::make_unique(*outputType); } function_set CastAnyFunction::getFunctionSet() { diff --git a/src/include/common/vector/auxiliary_buffer.h b/src/include/common/vector/auxiliary_buffer.h index 16babdcbee0..1226b9b9331 100644 --- a/src/include/common/vector/auxiliary_buffer.h +++ b/src/include/common/vector/auxiliary_buffer.h @@ -73,6 +73,7 @@ class ListAuxiliaryBuffer : public AuxiliaryBuffer { dataVector = std::move(vector); } inline ValueVector* getDataVector() const { return dataVector.get(); } + inline std::shared_ptr getSharedDataVector() const {return dataVector;} list_entry_t addList(uint64_t listSize); diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 19baa4ff0f6..6286083deba 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -147,6 +147,11 @@ class ListVector { return reinterpret_cast(vector->auxiliaryBuffer.get()) ->getDataVector(); } + static inline std::shared_ptr getSharedDataVector(const ValueVector* vector) { + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST); + return reinterpret_cast(vector->auxiliaryBuffer.get()) + ->getSharedDataVector(); + } static inline uint64_t getDataVectorSize(const ValueVector* vector) { KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST); return reinterpret_cast(vector->auxiliaryBuffer.get())->getSize(); diff --git a/src/include/function/unary_function_executor.h b/src/include/function/unary_function_executor.h index 00fc750c2ae..a5bca5a964e 100644 --- a/src/include/function/unary_function_executor.h +++ b/src/include/function/unary_function_executor.h @@ -94,12 +94,13 @@ struct CastChildFunctionExecutor { static void executeSwitch(common::ValueVector& operand, common::ValueVector& result, void* dataPtr) { // this vector is of var list type and the child vector is of non-nested types then cast - KU_ASSERT(operand.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST); + KU_ASSERT(operand.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST && + result.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST); result.resetAuxiliaryBuffer(); auto childNum = common::ListVector::getDataVectorSize(&operand); + auto inputChildVector = (common::ListVector::getDataVector(&operand)); + auto resultChildVector = (common::ListVector::getDataVector(&result)); for (auto i = 0u; i < childNum; i++) { - auto inputChildVector = (common::ListVector::getDataVector(&operand)); - auto resultChildVector = (common::ListVector::getDataVector(&operand)); resultChildVector->setNull(i, inputChildVector->isNull(i)); if (!resultChildVector->isNull(i)) { // cast position i in child data vector diff --git a/test/test_files/tinysnb/cast/cast_to_nested_types.test b/test/test_files/tinysnb/cast/cast_to_nested_types.test index 6527fb5b673..9afc1e37233 100644 --- a/test/test_files/tinysnb/cast/cast_to_nested_types.test +++ b/test/test_files/tinysnb/cast/cast_to_nested_types.test @@ -124,6 +124,12 @@ False|-4325||18446744073709551616.000000| dfsa ---- 2 [3324.123047,342423.437500,432.122986] [1.000000,4231.000000,432.122986] +-STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]") +---- 1 +[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768] +-STATEMENT Return cast(cast(-4324324, "int128"), "int64") +---- 1 +-4324324 -LOG CastToMap -STATEMENT RETURN cast(" { c= {a = 3423 }, b = { g = 3421 } } ", "MAP(STRING, MAP(STRING, INT16))"), cast("{}", "MAP(STRING, MAP(STRING, INT16))"), cast("{d = {}}", "MAP(STRING, MAP(STRING, INT16))"); @@ -168,3 +174,19 @@ True|2019-03-19|-12.343200|32768| 1|-2147483648|1970-01-01 10:00:00.004666|-32769.000000|fsdxcv 0|0|2014-05-12 12:11:59|4324254534123134124032.000000|fsaf False|-4325|14|18446744073709551616.000000| dfsa + +-LOG CastVarListToVarList +-STATEMENT RETURN cast([321, 432], "DOUBLE[]"), cast([321, 432], "FLOAT[]"), cast([321, 432], "INT128[]"), cast([321, 432], "INT64[]"), cast([321, 432], "INT32[]"), cast([321, 432], "INT16[]"), cast([-1, -43], "INT8[]"), cast([0, 23], "UINT8[]"), cast([0, 23], "UINT16[]"), cast([0, 23], "UINT32[]"), cast([0, 23], "UINT64[]"), cast([5435234412435123, -432425341231], "STRING[]"); +---- 1 +[321.000000,432.000000]|[321.000000,432.000000]|[321,432]|[321,432]|[321,432]|[321,432]|[-1,-43]|[0,23]|[0,23]|[0,23]|[0,23]|[5435234412435123,-432425341231] +-STATEMENT RETURN cast([], "UINT64[]"), cast([NULL,], "UINT64[]"), cast(NULL, "UINT64[]"), cast([NULL, 432124, 0, NULL], "UINT64[]"); +---- 1 +[]|[,]||[,432124,0,] + +-LOG CastNestedVarListToNestedVarList +-STATEMENT RETURN cast([[4324.2312, 432.321, 43242.543], [31214.59,4132.72], NULL, [NULL,,4324.32]], "INT64[][]"); +---- 1 +[[4324,432,43243],[31215,4133],,[,,4324]] +-STATEMENT RETURN cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "INT64[2][]"), cast(cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "DOUBLE[2][]"), "STRING[]"); +---- 1 +[[123,3234],[124,3241],,[0,-4324234]]|[[123.000000,3234.000000],[124.000000,3241.000000],,[0.000000,-4324234.000000]] diff --git a/test/test_files/tinysnb/function/cast.test b/test/test_files/tinysnb/function/cast.test index f1d3ee58da5..e173eb887a5 100644 --- a/test/test_files/tinysnb/function/cast.test +++ b/test/test_files/tinysnb/function/cast.test @@ -335,6 +335,18 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff [1] [10,11,12,3,4,5,6,7] +-LOG CastListOfIntsToList +-STATEMENT MATCH (p:person) RETURN cast(p.workedHours, "DOUBLE[]"), cast(p.workedHours, "FLOAT[]"), cast(p.workedHours, "INT128[]"), cast(p.workedHours, "INT64[]"), cast(p.workedHours, "INT32[]"), cast(p.workedHours, "INT16[]"), cast(p.workedHours, "INT8[]"), cast(p.workedHours, "UINT8[]"), cast(p.workedHours, "UINT16[]"), cast(p.workedHours, "UINT32[]"), cast(p.workedHours, "UINT64[]"), cast(p.workedHours, "STRING[]") +---- 9 +[10.000000,5.000000]|[10.000000,5.000000]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5] +[12.000000,8.000000]|[12.000000,8.000000]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8] +[4.000000,5.000000]|[4.000000,5.000000]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5] +[1.000000,9.000000]|[1.000000,9.000000]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9] +[2.000000]|[2.000000]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2] +[3.000000,4.000000,5.000000,6.000000,7.000000]|[3.000000,4.000000,5.000000,6.000000,7.000000]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7] +[1.000000]|[1.000000]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1] +[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7] +||||||||||| -LOG CastListOfListOfIntsToString -STATEMENT MATCH (p:person) RETURN string(p.courseScoresPerTerm) @@ -1083,15 +1095,6 @@ False -STATEMENT Return cast(cast(-15, "float"), "int128"), cast(cast(-1, "double"), "int128"), cast(cast(15, "float"), "int128"), cast(cast(1, "double"), "int128") ---- 1 -15|-1|15|1 - --LOG CastStringToFixedList --STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]") ----- 1 -[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768] --STATEMENT Return cast(cast(-4324324, "int128"), "int64") ----- 1 --4324324 - -STATEMENT Return to_int64(to_int128(-4324324)) ---- 1 -4324324 diff --git a/test/test_files/tinysnb/function/play.test b/test/test_files/tinysnb/function/play.test new file mode 100644 index 00000000000..43b1e720727 --- /dev/null +++ b/test/test_files/tinysnb/function/play.test @@ -0,0 +1,15 @@ +-GROUP TinySnbReadTest +-DATASET CSV tinysnb + +-- + +-CASE FunctionCast +-STATEMENT RETURN cast([321, 432], "DOUBLE[]"); +---- 1 +return +-STATEMENT RETURN cast("[321, 432]", INT32[2]), cast("[321, 4321]", INT64[2]); +---- 1 +return +-STATEMENT Match(a:person) WHERE a.age > 40 Return cast(a.courseScoresPerTerm, "STRING") +---- 1 +42