finish cast between nested list

kuzudb · Nov 13, 2023 · 1b9ac93 · 1b9ac93
1 parent 28c99e7
commit 1b9ac93
Show file tree

Hide file tree

Showing 11 changed files with 371 additions and 150 deletions.
diff --git a/src/expression_evaluator/function_evaluator.cpp b/src/expression_evaluator/function_evaluator.cpp
@@ -22,10 +22,8 @@ void FunctionExpressionEvaluator::evaluate() {
         child->evaluate();
     }
     auto expr = reinterpret_cast<binder::ScalarFunctionExpression*>(expression.get());
-    if (expr->getFunctionName() == CAST_FUNC_NAME &&
-        parameters[0]->dataType.getLogicalTypeID() == LogicalTypeID::STRING) {
-        execFunc(parameters, *resultVector,
-            reinterpret_cast<function::StringCastFunctionBindData*>(expr->getBindData()));
+    if (expr->getFunctionName() == CAST_FUNC_NAME) {
+        execFunc(parameters, *resultVector, expr->getBindData());
         return;
     }
     if (execFunc != nullptr) {

diff --git a/src/function/vector_cast_functions.cpp b/src/function/vector_cast_functions.cpp
diff --git a/src/include/common/null_mask.h b/src/include/common/null_mask.h
@@ -115,7 +115,7 @@ class NullMask {
 
     // const because updates to the data must set mayContainNulls if any value
     // becomes non-null
-    // Modifying the underlying data shuld be done with setNull or copyFromNullData
+    // Modifying the underlying data should be done with setNull or copyFromNullData
     inline const uint64_t* getData() { return data; }
 
     static inline uint64_t getNumNullEntries(uint64_t numNullBits) {

diff --git a/src/include/common/vector/auxiliary_buffer.h b/src/include/common/vector/auxiliary_buffer.h
@@ -73,6 +73,7 @@ class ListAuxiliaryBuffer : public AuxiliaryBuffer {
         dataVector = std::move(vector);
     }
     inline ValueVector* getDataVector() const { return dataVector.get(); }
+    inline std::shared_ptr<ValueVector> getSharedDataVector() const { return dataVector; }
 
     list_entry_t addList(uint64_t listSize);
 

diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h
@@ -147,6 +147,11 @@ class ListVector {
         return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
             ->getDataVector();
     }
+    static inline std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
+        KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST);
+        return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
+            ->getSharedDataVector();
+    }
     static inline uint64_t getDataVectorSize(const ValueVector* vector) {
         KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST);
         return reinterpret_cast<ListAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())->getSize();

diff --git a/src/include/function/cast/vector_cast_functions.h b/src/include/function/cast/vector_cast_functions.h
@@ -16,6 +16,7 @@ struct CastFunction {
     static bool hasImplicitCast(
         const common::LogicalType& srcType, const common::LogicalType& dstType);
 
+    template<typename EXECUTOR = UnaryFunctionExecutor>
     static std::unique_ptr<ScalarFunction> bindCastFunction(const std::string& functionName,
         common::LogicalTypeID sourceTypeID, common::LogicalTypeID targetTypeID);
 };

diff --git a/src/include/function/scalar_function.h b/src/include/function/scalar_function.h
@@ -92,11 +92,12 @@ struct ScalarFunction : public BaseScalarFunction {
             *params[0], *params[1], selVector);
     }
 
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
+    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
+        typename EXECUTOR = UnaryFunctionExecutor>
     static void UnaryExecFunction(const std::vector<std::shared_ptr<common::ValueVector>>& params,
         common::ValueVector& result, void* /*dataPtr*/) {
         KU_ASSERT(params.size() == 1);
-        UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryFunctionWrapper>(
+        EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryFunctionWrapper>(
             *params[0], result, nullptr /* dataPtr */);
     }
 
@@ -109,30 +110,33 @@ struct ScalarFunction : public BaseScalarFunction {
             UnaryStringFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
     }
 
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
+    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
+        typename EXECUTOR = UnaryFunctionExecutor>
     static void UnaryCastStringExecFunction(
         const std::vector<std::shared_ptr<common::ValueVector>>& params,
         common::ValueVector& result, void* dataPtr) {
         KU_ASSERT(params.size() == 1);
-        UnaryFunctionExecutor::executeCastString<OPERAND_TYPE, RESULT_TYPE, FUNC>(
-            *params[0], result, dataPtr);
+        EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
+            UnaryCastStringFunctionWrapper>(*params[0], result, dataPtr);
     }
 
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
+    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
+        typename EXECUTOR = UnaryFunctionExecutor>
     static void UnaryCastExecFunction(
         const std::vector<std::shared_ptr<common::ValueVector>>& params,
         common::ValueVector& result, void* /*dataPtr*/ = nullptr) {
         KU_ASSERT(params.size() == 1);
-        UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
-            UnaryCastFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
+        EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryCastFunctionWrapper>(
+            *params[0], result, nullptr /* dataPtr */);
     }
 
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
+    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC,
+        typename EXECUTOR = UnaryFunctionExecutor>
     static void UnaryTryCastExecFunction(
         const std::vector<std::shared_ptr<common::ValueVector>>& params,
         common::ValueVector& result, void* /*dataPtr*/ = nullptr) {
         KU_ASSERT(params.size() == 1);
-        UnaryFunctionExecutor::executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
+        EXECUTOR::template executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC,
             UnaryTryCastFunctionWrapper>(*params[0], result, nullptr /* dataPtr */);
     }
 

diff --git a/src/include/function/unary_function_executor.h b/src/include/function/unary_function_executor.h
@@ -89,6 +89,27 @@ struct UnaryUDFFunctionWrapper {
     }
 };
 
+struct CastChildFunctionExecutor {
+    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC, typename OP_WRAPPER>
+    static void executeSwitch(
+        common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {
+        // this vector is of var list type and the child vector is of non-nested types then cast
+        KU_ASSERT(operand.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST &&
+                  result.dataType.getLogicalTypeID() == common::LogicalTypeID::VAR_LIST);
+        auto childNum = common::ListVector::getDataVectorSize(&operand);
+        auto inputChildVector = common::ListVector::getDataVector(&operand);
+        auto resultChildVector = (common::ListVector::getDataVector(&result));
+        for (auto i = 0u; i < childNum; i++) {
+            resultChildVector->setNull(i, inputChildVector->isNull(i));
+            if (!resultChildVector->isNull(i)) {
+                // cast position i in child data vector
+                OP_WRAPPER::template operation<OPERAND_TYPE, RESULT_TYPE, FUNC>(
+                    (void*)(inputChildVector), i, (void*)(resultChildVector), i, dataPtr);
+            }
+        }
+    }
+};
+
 struct UnaryFunctionExecutor {
     template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC, typename OP_WRAPPER>
     static void executeOnValue(common::ValueVector& inputVector, uint64_t inputPos,
@@ -152,19 +173,6 @@ struct UnaryFunctionExecutor {
             operand, result, nullptr /* dataPtr */);
     }
 
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
-    static void executeString(common::ValueVector& operand, common::ValueVector& result) {
-        executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryStringFunctionWrapper>(
-            operand, result, nullptr /* dataPtr */);
-    }
-
-    template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
-    static void executeCastString(
-        common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {
-        executeSwitch<OPERAND_TYPE, RESULT_TYPE, FUNC, UnaryCastStringFunctionWrapper>(
-            operand, result, dataPtr);
-    }
-
     template<typename OPERAND_TYPE, typename RESULT_TYPE, typename FUNC>
     static void executeUDF(
         common::ValueVector& operand, common::ValueVector& result, void* dataPtr) {

diff --git a/test/test_files/tinysnb/cast/cast_error.test b/test/test_files/tinysnb/cast/cast_error.test
@@ -702,3 +702,14 @@ Conversion exception: Unsupported casting function from REL to TIMESTAMP.
 -STATEMENT MATCH (:person)-[e:studyAt*1..3]->(:organisation) return cast(e, "INT64");
 ---- error
 Conversion exception: Unsupported casting function from RECURSIVE_REL to INT64.
+
+-LOG InvalidVarListToVarList
+-STATEMENT RETURN cast([31231], "INT64[][]");
+---- error
+Conversion exception: Unsupported casting function from INT64 to VAR_LIST.
+-STATEMENT RETURN cast([-1], "UINT8[]");
+---- error
+Overflow exception: Value -1 is not within UINT8 range
+-STATEMENT RETURN cast([[1, 1]], "UINT8[]");
+---- error
+Conversion exception: Unsupported casting function from VAR_LIST to UINT8.
diff --git a/test/test_files/tinysnb/cast/cast_to_nested_types.test b/test/test_files/tinysnb/cast/cast_to_nested_types.test
@@ -124,6 +124,12 @@ False|-4325||18446744073709551616.000000|  dfsa
 ---- 2
 [3324.123047,342423.437500,432.122986]
 [1.000000,4231.000000,432.122986]
+-STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]")
+---- 1
+[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768]
+-STATEMENT Return cast(cast(-4324324, "int128"), "int64")
+---- 1
+-4324324
 
 -LOG CastToMap
 -STATEMENT RETURN cast(" { c= {a =  3423 },  b =  {  g = 3421 }  } ", "MAP(STRING, MAP(STRING, INT16))"), cast("{}", "MAP(STRING, MAP(STRING, INT16))"), cast("{d = {}}", "MAP(STRING, MAP(STRING, INT16))");
@@ -168,3 +174,31 @@ True|2019-03-19|-12.343200|32768|
 1|-2147483648|1970-01-01 10:00:00.004666|-32769.000000|fsdxcv
 0|0|2014-05-12 12:11:59|4324254534123134124032.000000|fsaf
 False|-4325|14|18446744073709551616.000000|  dfsa
+
+-LOG CastVarListToVarList
+-STATEMENT RETURN cast([321, 432], "DOUBLE[]"), cast([321, 432], "FLOAT[]"), cast([321, 432], "INT128[]"), cast([321, 432], "INT64[]"), cast([321, 432], "INT32[]"), cast([321, 432], "INT16[]"), cast([-1, -43], "INT8[]"), cast([0, 23], "UINT8[]"), cast([0, 23], "UINT16[]"), cast([0, 23], "UINT32[]"), cast([0, 23], "UINT64[]"), cast([5435234412435123, -432425341231], "STRING[]");
+---- 1
+[321.000000,432.000000]|[321.000000,432.000000]|[321,432]|[321,432]|[321,432]|[321,432]|[-1,-43]|[0,23]|[0,23]|[0,23]|[0,23]|[5435234412435123,-432425341231]
+-STATEMENT RETURN cast([], "UINT64[]"), cast([NULL,], "UINT64[]"), cast(NULL, "UINT64[]"), cast([NULL, 432124, 0, NULL], "UINT64[]");
+---- 1
+[]|[,]||[,432124,0,]
+
+-LOG CastNestedVarListToNestedVarList
+-STATEMENT RETURN cast([[4324.2312, 432.321, 43242.543], [31214.59,4132.72], NULL, [NULL,,4324.32]], "INT64[][]");
+---- 1
+[[4324,432,43243],[31215,4133],,[,,4324]]
+-STATEMENT RETURN cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "INT64[2][]"), cast(cast(["[123, 3234]", "[124, 3241]", NULL, "[0, -4324234]"], "DOUBLE[2][]"), "STRING[]");
+---- 1
+[[123,3234],[124,3241],,[0,-4324234]]|[[123.000000,3234.000000],[124.000000,3241.000000],,[0.000000,-4324234.000000]]
+-STATEMENT RETURN cast([NULL, NULL, NULL], "INT8[][][]"), cast([NULL], "STRING[]"), cast([], "UINT8[]");
+---- 1
+[,,]|[]|[]
+-STATEMENT RETURN cast(cast([NULL, [NULL, 13], NULL, [14, 14], NULL], "INT32[][]"), "INT128[][]"), cast([NULL, 1], "INT16[]"), cast("[1, NULL, NULL]", "UINT32[]"), cast("[NULL, 1, NULL]", "UINT64[]");
+---- 1
+[,[,13],,[14,14],]|[,1]|[1,,]|[,1,]
+-STATEMENT RETURN cast(NULL, "INT32[][]");
+---- 1
+
+-STATEMENT RETURN cast(cast(cast(cast(["[NULL, [NULL, 1, 0, 2], NULL, [1, 2, 3, 4, 5], NULL]", "[[1, 2, 3], [4, 5, 6]]"], "UINT8[][][]"), "UINT16[][][]"), "INT32[][][]"), "DOUBLE[][][]");
+---- 1
+[[,[,1.000000,0.000000,2.000000],,[1.000000,2.000000,3.000000,4.000000,5.000000],],[[1.000000,2.000000,3.000000],[4.000000,5.000000,6.000000]]]
diff --git a/test/test_files/tinysnb/function/cast.test b/test/test_files/tinysnb/function/cast.test
@@ -335,6 +335,26 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
 [1]
 [10,11,12,3,4,5,6,7]
 
+-LOG CastListOfIntsToList
+-STATEMENT MATCH (p:person) RETURN cast(p.workedHours, "DOUBLE[]"), cast(p.workedHours, "FLOAT[]"), cast(p.workedHours, "INT128[]"), cast(p.workedHours, "INT64[]"), cast(p.workedHours, "INT32[]"), cast(p.workedHours, "INT16[]"), cast(p.workedHours, "INT8[]"), cast(p.workedHours, "UINT8[]"), cast(p.workedHours, "UINT16[]"), cast(p.workedHours, "UINT32[]"), cast(p.workedHours, "UINT64[]"), cast(p.workedHours, "STRING[]")
+---- 9
+[10.000000,5.000000]|[10.000000,5.000000]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]|[10,5]
+[12.000000,8.000000]|[12.000000,8.000000]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]|[12,8]
+[4.000000,5.000000]|[4.000000,5.000000]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]|[4,5]
+[1.000000,9.000000]|[1.000000,9.000000]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]|[1,9]
+[2.000000]|[2.000000]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]|[2]
+[3.000000,4.000000,5.000000,6.000000,7.000000]|[3.000000,4.000000,5.000000,6.000000,7.000000]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]|[3,4,5,6,7]
+[1.000000]|[1.000000]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]|[1]
+[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10.000000,11.000000,12.000000,3.000000,4.000000,5.000000,6.000000,7.000000]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]|[10,11,12,3,4,5,6,7]
+|||||||||||
+-STATEMENT MATCH (p:person) WHERE size(p.workedHours) > 1 RETURN cast(p.workedHours, "STRING[]")
+---- 6
+[1,9]
+[10,5]
+[12,8]
+[3,4,5,6,7]
+[4,5]
+[10,11,12,3,4,5,6,7]
 
 -LOG CastListOfListOfIntsToString
 -STATEMENT MATCH (p:person) RETURN string(p.courseScoresPerTerm)
@@ -359,6 +379,28 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
 [[10]]
 [[7],[10],[6,7]]
 
+-LOG CastListOfListOfIntsToListOfLists
+-STATEMENT MATCH (p:person) RETURN cast(p.courseScoresPerTerm, "INT128[][]"), cast(p.courseScoresPerTerm, "INT64[][]"), cast(p.courseScoresPerTerm, "INT32[][]"), cast(p.courseScoresPerTerm, "INT16[][]"), cast(p.courseScoresPerTerm, "INT8[][]"), cast(p.courseScoresPerTerm, "UINT8[][]"), cast(p.courseScoresPerTerm, "UINT16[][]"), cast(p.courseScoresPerTerm, "UINT32[][]"), cast(p.courseScoresPerTerm, "UINT64[][]"), cast(p.courseScoresPerTerm, "DOUBLE[][]"), cast(p.courseScoresPerTerm, "FLOAT[][]")
+---- 9
+[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10,8],[6,7,8]]|[[10.000000,8.000000],[6.000000,7.000000,8.000000]]|[[10.000000,8.000000],[6.000000,7.000000,8.000000]]
+[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8,9],[9,10]]|[[8.000000,9.000000],[9.000000,10.000000]]|[[8.000000,9.000000],[9.000000,10.000000]]
+[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8,10]]|[[8.000000,10.000000]]|[[8.000000,10.000000]]
+[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7,4],[8,8],[9]]|[[7.000000,4.000000],[8.000000,8.000000],[9.000000]]|[[7.000000,4.000000],[8.000000,8.000000],[9.000000]]
+[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6],[7],[8]]|[[6.000000],[7.000000],[8.000000]]|[[6.000000],[7.000000],[8.000000]]
+[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8]]|[[8.000000]]|[[8.000000]]
+[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10]]|[[10.000000]]|[[10.000000]]
+[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7],[10],[6,7]]|[[7.000000],[10.000000],[6.000000,7.000000]]|[[7.000000],[10.000000],[6.000000,7.000000]]
+||||||||||
+-STATEMENT MATCH (p:person) WHERE size(p.courseScoresPerTerm) > 2 RETURN cast(p.courseScoresPerTerm, "STRING[]");
+---- 3
+[[7,4],[8,8],[9]]
+[[6],[7],[8]]
+[[7],[10],[6,7]]
+-STATEMENT MATCH (p:person) WHERE size(p.courseScoresPerTerm) > 2 RETURN cast(cast(p.courseScoresPerTerm, "INT32[][]"), "UINT8[][]");
+---- 3
+[[7,4],[8,8],[9]]
+[[6],[7],[8]]
+[[7],[10],[6,7]]
 
 -LOG CastFixedListToString
 -STATEMENT MATCH (p:person) where p.ID > 1 RETURN string(p.grades)
@@ -1083,15 +1125,6 @@ False
 -STATEMENT Return cast(cast(-15, "float"), "int128"), cast(cast(-1, "double"), "int128"), cast(cast(15, "float"), "int128"), cast(cast(1, "double"), "int128")
 ---- 1
 -15|-1|15|1
-
--LOG CastStringToFixedList
--STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]")
----- 1
-[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768]
--STATEMENT Return cast(cast(-4324324, "int128"), "int64")
----- 1
--4324324
-
 -STATEMENT Return to_int64(to_int128(-4324324))
 ---- 1
 -4324324