Skip to content

Commit

Permalink
finish cast between nested list
Browse files Browse the repository at this point in the history
  • Loading branch information
AEsir777 committed Nov 13, 2023
1 parent 28c99e7 commit 1b9ac93
Show file tree
Hide file tree
Showing 11 changed files with 371 additions and 150 deletions.
6 changes: 2 additions & 4 deletions src/expression_evaluator/function_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ void FunctionExpressionEvaluator::evaluate() {
child->evaluate();
}
auto expr = reinterpret_cast<binder::ScalarFunctionExpression*>(expression.get());
if (expr->getFunctionName() == CAST_FUNC_NAME &&
parameters[0]->dataType.getLogicalTypeID() == LogicalTypeID::STRING) {
execFunc(parameters, *resultVector,
reinterpret_cast<function::StringCastFunctionBindData*>(expr->getBindData()));
if (expr->getFunctionName() == CAST_FUNC_NAME) {
execFunc(parameters, *resultVector, expr->getBindData());
return;
}
if (execFunc != nullptr) {
Expand Down
352 changes: 239 additions & 113 deletions src/function/vector_cast_functions.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/include/common/null_mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class NullMask {

// const because updates to the data must set mayContainNulls if any value
// becomes non-null
// Modifying the underlying data shuld be done with setNull or copyFromNullData
// Modifying the underlying data should be done with setNull or copyFromNullData
inline const uint64_t* getData() { return data; }

static inline uint64_t getNumNullEntries(uint64_t numNullBits) {
Expand Down
1 change: 1 addition & 0 deletions src/include/common/vector/auxiliary_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class ListAuxiliaryBuffer : public AuxiliaryBuffer {
dataVector = std::move(vector);
}
inline ValueVector* getDataVector() const { return dataVector.get(); }
inline std::shared_ptr<ValueVector> getSharedDataVector() const { return dataVector; }

list_entry_t addList(uint64_t listSize);

Expand Down
5 changes: 5 additions & 0 deletions src/include/common/vector/value_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ class ListVector {
return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
->getDataVector();
}
static inline std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST);
return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
->getSharedDataVector();
}
static inline uint64_t getDataVectorSize(const ValueVector* vector) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST);
return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())->getSize();
Expand Down
1 change: 1 addition & 0 deletions src/include/function/cast/vector_cast_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct CastFunction {
static bool hasImplicitCast(
const common::LogicalType& srcType, const common::LogicalType& dstType);

template<typename EXECUTOR = UnaryFunctionExecutor>
static std::unique_ptr<ScalarFunction> bindCastFunction(const std::string& functionName,
common::LogicalTypeID sourceTypeID, common::LogicalTypeID targetTypeID);
};
Expand Down
24 changes: 14 additions & 10 deletions src/include/function/scalar_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,12 @@ struct ScalarFunction : public BaseScalarFunction {
*params[0], *params[1], selVector);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
typename EXECUTOR = UnaryFunctionExecutor>
static void UnaryExecFunction(const std::vector<std::shared_ptr<common::ValueVector>>& params,
common::ValueVector& result, void* /*dataPtr*/) {
KU_ASSERT(params.size() == 1);
UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryFunctionWrapper>(
EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryFunctionWrapper>(
*params[0], result, nullptr /* dataPtr */);
}

Expand All @@ -109,30 +110,33 @@ struct ScalarFunction : public BaseScalarFunction {
UnaryStringFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
typename EXECUTOR = UnaryFunctionExecutor>
static void UnaryCastStringExecFunction(
const std::vector<std::shared_ptr<common::ValueVector>>& params,
common::ValueVector& result, void* dataPtr) {
KU_ASSERT(params.size() == 1);
UnaryFunctionExecutor::executeCastString<OPERAND_TYPE, RESULT_TYPE, FUNC>(
*params[0], result, dataPtr);
EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
UnaryCastStringFunctionWrapper>(*params[0], result, dataPtr);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
typename EXECUTOR = UnaryFunctionExecutor>
static void UnaryCastExecFunction(
const std::vector<std::shared_ptr<common::ValueVector>>& params,
common::ValueVector& result, void* /*dataPtr*/ = nullptr) {
KU_ASSERT(params.size() == 1);
UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
UnaryCastFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryCastFunctionWrapper>(
*params[0], result, nullptr /* dataPtr */);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
typename EXECUTOR = UnaryFunctionExecutor>
static void UnaryTryCastExecFunction(
const std::vector<std::shared_ptr<common::ValueVector>>& params,
common::ValueVector& result, void* /*dataPtr*/ = nullptr) {
KU_ASSERT(params.size() == 1);
UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
UnaryTryCastFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
}

Expand Down
34 changes: 21 additions & 13 deletions src/include/function/unary_function_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,27 @@ struct UnaryUDFFunctionWrapper {
}
};

struct CastChildFunctionExecutor {
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC, typename OP_WRAPPER>
static void executeSwitch(
common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {
// this vector is of var list type and the child vector is of non-nested types then cast
KU_ASSERT(operand.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST &&
result.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST);
auto childNum = common::ListVector::getDataVectorSize(&operand);
auto inputChildVector = common::ListVector::getDataVector(&operand);
auto resultChildVector = (common::ListVector::getDataVector(&result));
for (auto i = 0u; i < childNum; i++) {
resultChildVector->setNull(i, inputChildVector->isNull(i));
if (!resultChildVector->isNull(i)) {
// cast position i in child data vector
OP_WRAPPER::template operation<OPERAND_TYPE, RESULT_TYPE, FUNC>(

Check warning on line 106 in src/include/function/unary_function_executor.h

View check run for this annotation

Codecov / codecov/patch

src/include/function/unary_function_executor.h#L106

Added line #L106 was not covered by tests
(void*)(inputChildVector), i, (void*)(resultChildVector), i, dataPtr);
}
}
}
};

struct UnaryFunctionExecutor {
template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC, typename OP_WRAPPER>
static void executeOnValue(common::ValueVector& inputVector, uint64_t inputPos,
Expand Down Expand Up @@ -152,19 +173,6 @@ struct UnaryFunctionExecutor {
operand, result, nullptr /* dataPtr */);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
static void executeString(common::ValueVector& operand, common::ValueVector& result) {
executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryStringFunctionWrapper>(
operand, result, nullptr /* dataPtr */);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
static void executeCastString(
common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {
executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryCastStringFunctionWrapper>(
operand, result, dataPtr);
}

template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
static void executeUDF(
common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {
Expand Down
11 changes: 11 additions & 0 deletions test/test_files/tinysnb/cast/cast_error.test
Original file line number Diff line number Diff line change
Expand Up @@ -702,3 +702,14 @@ Conversion exception: Unsupported casting function from REL to TIMESTAMP.
-STATEMENT MATCH (:person)-[e:studyAt*1..3]->(:organisation) return cast(e, "INT64");
---- error
Conversion exception: Unsupported casting function from RECURSIVE_REL to INT64.

-LOG InvalidVarListToVarList
-STATEMENT RETURN cast([31231], "INT64[][]");
---- error
Conversion exception: Unsupported casting function from INT64 to VAR_LIST.
-STATEMENT RETURN cast([-1], "UINT8[]");
---- error
Overflow exception: Value -1 is not within UINT8 range
-STATEMENT RETURN cast([[1, 1]], "UINT8[]");
---- error
Conversion exception: Unsupported casting function from VAR_LIST to UINT8.
34 changes: 34 additions & 0 deletions test/test_files/tinysnb/cast/cast_to_nested_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ False|-4325||18446744073709551616.000000| dfsa
---- 2
[3324.123047,342423.437500,432.122986]
[1.000000,4231.000000,432.122986]
-STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]")
---- 1
[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768]
-STATEMENT Return cast(cast(-4324324, "int128"), "int64")
---- 1
-4324324

-LOG CastToMap
-STATEMENT RETURN cast(" { c= {a = 3423 }, b = { g = 3421 } } ", "MAP(STRING, MAP(STRING, INT16))"), cast("{}", "MAP(STRING, MAP(STRING, INT16))"), cast("{d = {}}", "MAP(STRING, MAP(STRING, INT16))");
Expand Down Expand Up @@ -168,3 +174,31 @@ True|2019-03-19|-12.343200|32768|
1|-2147483648|1970-01-01 10:00:00.004666|-32769.000000|fsdxcv
0|0|2014-05-12 12:11:59|4324254534123134124032.000000|fsaf
False|-4325|14|18446744073709551616.000000| dfsa

-LOG CastVarListToVarList
-STATEMENT RETURN cast([321, 432], "DOUBLE[]"), cast([321, 432], "FLOAT[]"), cast([321, 432], "INT128[]"), cast([321, 432], "INT64[]"), cast([321, 432], "INT32[]"), cast([321, 432], "INT16[]"), cast([-1, -43], "INT8[]"), cast([0, 23], "UINT8[]"), cast([0, 23], "UINT16[]"), cast([0, 23], "UINT32[]"), cast([0, 23], "UINT64[]"), cast([5435234412435123, -432425341231], "STRING[]");
---- 1
[321.000000,432.000000]|[321.000000,432.000000]|[321,432]|[321,432]|[321,432]|[321,432]|[-1,-43]|[0,23]|[0,23]|[0,23]|[0,23]|[5435234412435123,-432425341231]
-STATEMENT RETURN cast([], "UINT64[]"), cast([NULL,], "UINT64[]"), cast(NULL, "UINT64[]"), cast([NULL, 432124, 0, NULL], "UINT64[]");
---- 1
[]|[,]||[,432124,0,]

-LOG CastNestedVarListToNestedVarList
-STATEMENT RETURN cast([[4324.2312, 432.321, 43242.543], [31214.59,4132.72], NULL, [NULL,,4324.32]], "INT64[][]");
---- 1
[[4324,432,43243],[31215,4133],,[,,4324]]
-STATEMENT RETURN cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "INT64[2][]"), cast(cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "DOUBLE[2][]"), "STRING[]");
---- 1
[[123,3234],[124,3241],,[0,-4324234]]|[[123.000000,3234.000000],[124.000000,3241.000000],,[0.000000,-4324234.000000]]
-STATEMENT RETURN cast([NULL, NULL, NULL], "INT8[][][]"), cast([NULL], "STRING[]"), cast([], "UINT8[]");
---- 1
[,,]|[]|[]
-STATEMENT RETURN cast(cast([NULL, [NULL, 13], NULL, [14, 14], NULL], "INT32[][]"), "INT128[][]"), cast([NULL, 1], "INT16[]"), cast("[1, NULL, NULL]", "UINT32[]"), cast("[NULL, 1, NULL]", "UINT64[]");
---- 1
[,[,13],,[14,14],]|[,1]|[1,,]|[,1,]
-STATEMENT RETURN cast(NULL, "INT32[][]");
---- 1

-STATEMENT RETURN cast(cast(cast(cast(["[NULL, [NULL, 1, 0, 2], NULL, [1, 2, 3, 4, 5], NULL]", "[[1, 2, 3], [4, 5, 6]]"], "UINT8[][][]"), "UINT16[][][]"), "INT32[][][]"), "DOUBLE[][][]");
---- 1
[[,[,1.000000,0.000000,2.000000],,[1.000000,2.000000,3.000000,4.000000,5.000000],],[[1.000000,2.000000,3.000000],[4.000000,5.000000,6.000000]]]
51 changes: 42 additions & 9 deletions test/test_files/tinysnb/function/cast.test
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,26 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
[1]
[10,11,12,3,4,5,6,7]

-LOG CastListOfIntsToList
-STATEMENT MATCH (p:person) RETURN cast(p.workedHours, "DOUBLE[]"), cast(p.workedHours, "FLOAT[]"), cast(p.workedHours, "INT128[]"), cast(p.workedHours, "INT64[]"), cast(p.workedHours, "INT32[]"), cast(p.workedHours, "INT16[]"), cast(p.workedHours, "INT8[]"), cast(p.workedHours, "UINT8[]"), cast(p.workedHours, "UINT16[]"), cast(p.workedHours, "UINT32[]"), cast(p.workedHours, "UINT64[]"), cast(p.workedHours, "STRING[]")
---- 9
[10.000000,5.000000]|[10.000000,5.000000]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]
[12.000000,8.000000]|[12.000000,8.000000]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]
[4.000000,5.000000]|[4.000000,5.000000]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]
[1.000000,9.000000]|[1.000000,9.000000]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]
[2.000000]|[2.000000]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]
[3.000000,4.000000,5.000000,6.000000,7.000000]|[3.000000,4.000000,5.000000,6.000000,7.000000]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]
[1.000000]|[1.000000]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]
[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]
|||||||||||
-STATEMENT MATCH (p:person) WHERE size(p.workedHours) > 1 RETURN cast(p.workedHours, "STRING[]")
---- 6
[1,9]
[10,5]
[12,8]
[3,4,5,6,7]
[4,5]
[10,11,12,3,4,5,6,7]

-LOG CastListOfListOfIntsToString
-STATEMENT MATCH (p:person) RETURN string(p.courseScoresPerTerm)
Expand All @@ -359,6 +379,28 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
[[10]]
[[7],[10],[6,7]]

-LOG CastListOfListOfIntsToListOfLists
-STATEMENT MATCH (p:person) RETURN cast(p.courseScoresPerTerm, "INT128[][]"), cast(p.courseScoresPerTerm, "INT64[][]"), cast(p.courseScoresPerTerm, "INT32[][]"), cast(p.courseScoresPerTerm, "INT16[][]"), cast(p.courseScoresPerTerm, "INT8[][]"), cast(p.courseScoresPerTerm, "UINT8[][]"), cast(p.courseScoresPerTerm, "UINT16[][]"), cast(p.courseScoresPerTerm, "UINT32[][]"), cast(p.courseScoresPerTerm, "UINT64[][]"), cast(p.courseScoresPerTerm, "DOUBLE[][]"), cast(p.courseScoresPerTerm, "FLOAT[][]")
---- 9
[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10.000000,8.000000],[6.000000,7.000000,8.000000]]|[[10.000000,8.000000],[6.000000,7.000000,8.000000]]
[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8.000000,9.000000],[9.000000,10.000000]]|[[8.000000,9.000000],[9.000000,10.000000]]
[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8.000000,10.000000]]|[[8.000000,10.000000]]
[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7.000000,4.000000],[8.000000,8.000000],[9.000000]]|[[7.000000,4.000000],[8.000000,8.000000],[9.000000]]
[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6.000000],[7.000000],[8.000000]]|[[6.000000],[7.000000],[8.000000]]
[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8.000000]]|[[8.000000]]
[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10.000000]]|[[10.000000]]
[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7.000000],[10.000000],[6.000000,7.000000]]|[[7.000000],[10.000000],[6.000000,7.000000]]
||||||||||
-STATEMENT MATCH (p:person) WHERE size(p.courseScoresPerTerm) > 2 RETURN cast(p.courseScoresPerTerm, "STRING[]");
---- 3
[[7,4],[8,8],[9]]
[[6],[7],[8]]
[[7],[10],[6,7]]
-STATEMENT MATCH (p:person) WHERE size(p.courseScoresPerTerm) > 2 RETURN cast(cast(p.courseScoresPerTerm, "INT32[][]"), "UINT8[][]");
---- 3
[[7,4],[8,8],[9]]
[[6],[7],[8]]
[[7],[10],[6,7]]

-LOG CastFixedListToString
-STATEMENT MATCH (p:person) where p.ID > 1 RETURN string(p.grades)
Expand Down Expand Up @@ -1083,15 +1125,6 @@ False
-STATEMENT Return cast(cast(-15, "float"), "int128"), cast(cast(-1, "double"), "int128"), cast(cast(15, "float"), "int128"), cast(cast(1, "double"), "int128")
---- 1
-15|-1|15|1

-LOG CastStringToFixedList
-STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]")
---- 1
[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768]
-STATEMENT Return cast(cast(-4324324, "int128"), "int64")
---- 1
-4324324

-STATEMENT Return to_int64(to_int128(-4324324))
---- 1
-4324324

0 comments on commit 1b9ac93

Please sign in to comment.