diff --git a/src/common/types/ku_string.cpp b/src/common/types/ku_string.cpp index 1f81183426b..827e4823f4f 100644 --- a/src/common/types/ku_string.cpp +++ b/src/common/types/ku_string.cpp @@ -35,6 +35,18 @@ std::string ku_string_t::getAsString() const { } } +char * ku_string_t::getAsShortCString() const { + return (char*)prefix; +} + +char * ku_string_t::getAsCString() const { + if (len <= SHORT_STR_LENGTH) { + return getAsShortCString(); + } else { + return reinterpret_cast(overflowPtr); + } +} + bool ku_string_t::operator==(const ku_string_t& rhs) const { // First compare the length and prefix of the strings. auto numBytesOfLenAndPrefix = diff --git a/src/function/vector_cast_functions.cpp b/src/function/vector_cast_functions.cpp index d5af4a28880..c25d7414639 100644 --- a/src/function/vector_cast_functions.cpp +++ b/src/function/vector_cast_functions.cpp @@ -252,6 +252,8 @@ vector_function_definitions CastToDoubleVectorFunction::getDefinitions() { CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::DOUBLE)); result.push_back(bindVectorFunction( CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::DOUBLE)); + result.push_back(bindVectorFunction( + CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::DOUBLE)); return result; } @@ -273,6 +275,8 @@ vector_function_definitions CastToFloatVectorFunction::getDefinitions() { CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::FLOAT)); result.push_back(bindVectorFunction( CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::FLOAT)); + result.push_back(bindVectorFunction( + CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::FLOAT)); // down cast result.push_back(bindVectorFunction( CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::FLOAT)); @@ -314,16 +318,19 @@ vector_function_definitions CastToInt64VectorFunction::getDefinitions() { // down cast result.push_back(bindVectorFunction( CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT64)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64)); + // down cast end result.push_back(bindVectorFunction( CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT64)); result.push_back(bindVectorFunction( CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT64)); result.push_back(bindVectorFunction( CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT64)); - result.push_back(bindVectorFunction( - CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64)); - result.push_back(bindVectorFunction( - CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -340,14 +347,17 @@ vector_function_definitions CastToInt32VectorFunction::getDefinitions() { CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT32)); result.push_back(bindVectorFunction( CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT32)); - result.push_back(bindVectorFunction( - CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32)); - result.push_back(bindVectorFunction( - CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32)); result.push_back(bindVectorFunction( CAST_TO_INT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT32)); result.push_back(bindVectorFunction( CAST_TO_INT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT32)); + // down cast end + result.push_back(bindVectorFunction( + CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32)); + result.push_back(bindVectorFunction( + CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -366,12 +376,15 @@ vector_function_definitions CastToInt16VectorFunction::getDefinitions() { CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT16)); result.push_back(bindVectorFunction( CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT16)); - result.push_back(bindVectorFunction( - CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16)); result.push_back(bindVectorFunction( CAST_TO_INT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT16)); result.push_back(bindVectorFunction( CAST_TO_INT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT16)); + // down cast end + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -386,8 +399,20 @@ vector_function_definitions CastToInt8VectorFunction::getDefinitions() { CAST_TO_INT8_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::INT8)); result.push_back(bindVectorFunction( CAST_TO_INT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT8)); result.push_back(bindVectorFunction( CAST_TO_INT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -399,7 +424,6 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() { CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::UINT64)); result.push_back(bindVectorFunction( CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::UINT64)); - // down cast result.push_back(bindVectorFunction( CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT8, LogicalTypeID::UINT64)); result.push_back(bindVectorFunction( @@ -408,10 +432,13 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() { CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT32, LogicalTypeID::UINT64)); result.push_back(bindVectorFunction( CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::UINT64)); + // down cast result.push_back(bindVectorFunction( CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT64)); result.push_back(bindVectorFunction( CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT64)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -436,6 +463,8 @@ vector_function_definitions CastToUInt32VectorFunction::getDefinitions() { CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT32)); result.push_back(bindVectorFunction( CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT32)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -460,6 +489,8 @@ vector_function_definitions CastToUInt16VectorFunction::getDefinitions() { CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT16)); result.push_back(bindVectorFunction( CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT16)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } @@ -484,6 +515,8 @@ vector_function_definitions CastToUInt8VectorFunction::getDefinitions() { CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT8)); result.push_back(bindVectorFunction( CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT8)); + result.push_back(bindVectorFunction( + CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64)); return result; } diff --git a/src/include/common/type_utils.h b/src/include/common/type_utils.h index 43bba3ea571..3beec37c8aa 100644 --- a/src/include/common/type_utils.h +++ b/src/include/common/type_utils.h @@ -19,39 +19,6 @@ class StringCastUtils { public: static bool tryCastToBoolean(const char* data, uint64_t length, bool& result); static bool castToBool(const char* data, uint64_t length); - template - static bool tryCastToNum(const char* data, uint64_t length, T& result) { - auto numStr = std::string{data, length}; - removeSpace(numStr); - std::istringstream iss{numStr}; - if (iss.str().empty()) { - throw ConversionException{"Empty string."}; - } - - if constexpr (std::is_same_v) { - int val; - iss >> val; // C++ will recognize int8 as char if we don't separate this case. - result = val; - } else if constexpr (std::is_same_v) { - int val; - iss >> val; // C++ will recognize int8 as char if we don't separate this case. - result = val; - } else - iss >> result; - - if (iss.fail() || !iss.eof()) { - return false; - } - return true; - } - template - static T castToNum(const char* data, uint64_t length) { - T result; - if (!tryCastToNum(data, length, result)) { - throw ConversionException{"Invalid number: " + std::string{data} + "."}; - } - return result; - } private: static void removeSpace(std::string& str); diff --git a/src/include/common/types/ku_string.h b/src/include/common/types/ku_string.h index 451e56b20d1..22f8eb07252 100644 --- a/src/include/common/types/ku_string.h +++ b/src/include/common/types/ku_string.h @@ -55,6 +55,8 @@ struct ku_string_t { std::string getAsShortString() const; std::string getAsString() const; + char * getAsShortCString() const; + char * getAsCString() const; bool operator==(const ku_string_t& rhs) const; diff --git a/src/include/function/cast/cast_functions.h b/src/include/function/cast/cast_functions.h index 3e692e7ea5a..0d84464c520 100644 --- a/src/include/function/cast/cast_functions.h +++ b/src/include/function/cast/cast_functions.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "common/exception/runtime.h" #include "common/type_utils.h" @@ -87,13 +88,46 @@ inline std::string CastToString::castToStringWithVector( template static inline void numericDownCast(SRC& input, DST& result, const std::string& dstTypeStr) { - if (input < std::numeric_limits::min() || input > std::numeric_limits::max()) { + if ((signed) input < std::numeric_limits::min() || (signed) input > std::numeric_limits::max()) { + auto a = std::numeric_limits::min(); + auto b = std::numeric_limits::max(); throw common::RuntimeException( "Cast failed. " + std::to_string(input) + " is not in " + dstTypeStr + " range."); } result = (DST)input; } +template +static inline void stringCastNum(char* &input, const size_t& len, T& result, const std::string& type) { + if (std::from_chars(input, input + len, result).ec != std::errc{}) { + throw common::RuntimeException( + "Cast failed. " + std::string{input} + " is not in " + type + " range."); + }; +} + +template +static inline bool tryStringCastNum(const char* &input, const uint64_t& len, T& result, uint64_t start = 0) { + if (std::from_chars(input + start, input + len, result).ec != std::errc{}) { + return false; + }; + return true; +} + +template +static inline T returnStringCastToNum(const char* input, uint64_t len) { + // skip leading spaces + uint64_t start = 0; + while (start < len && isspace(input[start])) { + start++; + } + + T result; + if (!tryStringCastNum(input, len, result, start)) { + throw common::ConversionException{"Invalid number: " + std::string{input} + "."}; + } + return result; +} + struct CastToDouble { template static inline void operation(T& input, double_t& result) { @@ -101,6 +135,17 @@ struct CastToDouble { } }; +template<> +inline void CastToDouble::operation(char* & input, double_t& result) { + stringCastNum(input, strlen(input), result, "DOUBLE"); +} + +template<> +inline void CastToDouble::operation(common::ku_string_t& input, double_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + struct CastToFloat { template static inline void operation(T& input, float_t& result) { @@ -108,6 +153,17 @@ struct CastToFloat { } }; +template<> +inline void CastToFloat::operation(char* & input, float_t& result) { + stringCastNum(input, strlen(input), result, "FLOAT"); +} + +template<> +inline void CastToFloat::operation(common::ku_string_t& input, float_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + template<> inline void CastToFloat::operation(double_t& input, float_t& result) { numericDownCast(input, result, "FLOAT"); @@ -120,6 +176,11 @@ struct CastToInt64 { } }; +template<> +inline void CastToInt64::operation(uint64_t& input, int64_t& result) { + numericDownCast(input, result, "INT64"); +} + template<> inline void CastToInt64::operation(double_t& input, int64_t& result) { numericDownCast(input, result, "INT64"); @@ -130,6 +191,17 @@ inline void CastToInt64::operation(float_t& input, int64_t& result) { numericDownCast(input, result, "INT64"); } +template<> +inline void CastToInt64::operation(char* & input, int64_t& result) { + stringCastNum(input, strlen(input), result, "INT64"); +} + +template<> +inline void CastToInt64::operation(common::ku_string_t& input, int64_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + struct CastToSerial { template static inline void operation(T& input, int64_t& result) { @@ -159,63 +231,71 @@ inline void CastToInt32::operation(int64_t& input, int32_t& result) { numericDownCast(input, result, "INT32"); } -struct CastToInt16 { - template - static inline void operation(T& input, int16_t& result) { - result = static_cast(input); - } -}; - template<> -inline void CastToInt16::operation(double_t& input, int16_t& result) { - numericDownCast(input, result, "INT16"); +inline void CastToInt32::operation(uint64_t& input, int32_t& result) { + numericDownCast(input, result, "INT32"); } template<> -inline void CastToInt16::operation(float_t& input, int16_t& result) { - numericDownCast(input, result, "INT16"); +inline void CastToInt32::operation(uint32_t& input, int32_t& result) { + numericDownCast(input, result, "INT32"); } template<> -inline void CastToInt16::operation(int64_t& input, int16_t& result) { - numericDownCast(input, result, "INT16"); +inline void CastToInt32::operation(char* & input, int32_t& result) { + stringCastNum(input, strlen(input), result, "INT32"); } template<> -inline void CastToInt16::operation(int32_t& input, int16_t& result) { - numericDownCast(input, result, "INT16"); +inline void CastToInt32::operation(common::ku_string_t& input, int32_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); } -struct CastToInt8 { +struct CastToInt16 { template - static inline void operation(T& input, int8_t& result) { - result = static_cast(input); + static inline void operation(T& input, int16_t& result) { + numericDownCast(input, result, "INT16"); } }; template<> -inline void CastToInt8::operation(double_t& input, int8_t& result) { - numericDownCast(input, result, "INT8"); +inline void CastToInt16::operation(int8_t & input, int16_t& result) { + result = static_cast(input); } template<> -inline void CastToInt8::operation(float_t& input, int8_t& result) { - numericDownCast(input, result, "INT8"); +inline void CastToInt16::operation(uint8_t & input, int16_t& result) { + result = static_cast(input); } template<> -inline void CastToInt8::operation(int64_t& input, int8_t& result) { - numericDownCast(input, result, "INT8"); +inline void CastToInt16::operation(char* & input, int16_t& result) { + stringCastNum(input, strlen(input), result, "INT16"); } template<> -inline void CastToInt8::operation(int32_t& input, int8_t& result) { - numericDownCast(input, result, "INT8"); +inline void CastToInt16::operation(common::ku_string_t& input, int16_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); } +struct CastToInt8 { + template + static inline void operation(T& input, int8_t& result) { + numericDownCast(input, result, "INT8"); + } +}; + template<> -inline void CastToInt8::operation(int16_t& input, int8_t& result) { - numericDownCast(input, result, "INT8"); +inline void CastToInt8::operation(char* & input, int8_t& result) { + stringCastNum(input, strlen(input), result, "INT8"); +} + +template<> +inline void CastToInt8::operation(common::ku_string_t& input, int8_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); } struct CastToUInt64 { @@ -235,6 +315,17 @@ inline void CastToUInt64::operation(float_t& input, uint64_t& result) { numericDownCast(input, result, "UINT64"); } +template<> +inline void CastToUInt64::operation(char* & input, uint64_t& result) { + stringCastNum(input, strlen(input), result, "UINT64"); +} + +template<> +inline void CastToUInt64::operation(common::ku_string_t& input, uint64_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + struct CastToUInt32 { template static inline void operation(T& input, uint32_t& result) { @@ -262,6 +353,17 @@ inline void CastToUInt32::operation(uint64_t& input, uint32_t& result) { numericDownCast(input, result, "UINT32"); } +template<> +inline void CastToUInt32::operation(char* & input, uint32_t& result) { + stringCastNum(input, strlen(input), result, "UINT32"); +} + +template<> +inline void CastToUInt32::operation(common::ku_string_t& input, uint32_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + struct CastToUInt16 { template static inline void operation(T& input, uint16_t& result) { @@ -299,6 +401,17 @@ inline void CastToUInt16::operation(uint32_t& input, uint16_t& result) { numericDownCast(input, result, "UINT16"); } +template<> +inline void CastToUInt16::operation(char* & input, uint16_t& result) { + stringCastNum(input, strlen(input), result, "UINT16"); +} + +template<> +inline void CastToUInt16::operation(common::ku_string_t& input, uint16_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + struct CastToUInt8 { template static inline void operation(T& input, uint8_t& result) { @@ -346,5 +459,16 @@ inline void CastToUInt8::operation(uint16_t& input, uint8_t& result) { numericDownCast(input, result, "UINT8"); } +template<> +inline void CastToUInt8::operation(char* & input, uint8_t& result) { + stringCastNum(input, strlen(input), result, "UINT8"); +} + +template<> +inline void CastToUInt8::operation(common::ku_string_t& input, uint8_t& result) { + auto input_cstring = input.getAsCString(); + operation(input_cstring, result); +} + } // namespace function } // namespace kuzu diff --git a/src/include/storage/in_mem_storage_structure/in_mem_column_chunk.h b/src/include/storage/in_mem_storage_structure/in_mem_column_chunk.h index 9527eac04c5..b725baa140c 100644 --- a/src/include/storage/in_mem_storage_structure/in_mem_column_chunk.h +++ b/src/include/storage/in_mem_storage_structure/in_mem_column_chunk.h @@ -3,6 +3,7 @@ #include "common/types/types.h" #include "storage/storage_structure/in_mem_file.h" #include "storage/store/table_copy_utils.h" +#include "function/cast/cast_functions.h" #include #include #include @@ -52,7 +53,7 @@ class InMemColumnChunk { template void setValueFromString( const char* value, uint64_t length, common::offset_t pos, Args... args) { - auto val = common::StringCastUtils::castToNum(value, length); + auto val = function::returnStringCastToNum(value, length); setValue(val, pos); } diff --git a/src/include/storage/store/column_chunk.h b/src/include/storage/store/column_chunk.h index f343046b088..44d9b131b76 100644 --- a/src/include/storage/store/column_chunk.h +++ b/src/include/storage/store/column_chunk.h @@ -8,6 +8,7 @@ #include "storage/buffer_manager/bm_file_handle.h" #include "storage/wal/wal.h" #include "transaction/transaction.h" +#include "function/cast/cast_functions.h" namespace arrow { class Array; @@ -103,7 +104,7 @@ class ColumnChunk { template void setValueFromString(const char* value, uint64_t length, common::offset_t pos) { - auto val = common::StringCastUtils::castToNum(value, length); + auto val = function::returnStringCastToNum(value, length); setValue(val, pos); } diff --git a/src/parser/transform/transform_expression.cpp b/src/parser/transform/transform_expression.cpp index 7199e1ac8f4..6b72985cafc 100644 --- a/src/parser/transform/transform_expression.cpp +++ b/src/parser/transform/transform_expression.cpp @@ -7,6 +7,7 @@ #include "parser/expression/parsed_subquery_expression.h" #include "parser/expression/parsed_variable_expression.h" #include "parser/transformer.h" +#include "function/cast/cast_functions.h" using namespace kuzu::common; @@ -569,7 +570,7 @@ std::unique_ptr Transformer::transformIntegerLiteral( CypherParser::OC_IntegerLiteralContext& ctx) { auto text = ctx.DecimalInteger()->getText(); auto value = - std::make_unique(StringCastUtils::castToNum(text.c_str(), text.length())); + std::make_unique(function::returnStringCastToNum(text.c_str(), text.length())); return std::make_unique(std::move(value), ctx.getText()); } @@ -577,7 +578,7 @@ std::unique_ptr Transformer::transformDoubleLiteral( CypherParser::OC_DoubleLiteralContext& ctx) { auto text = ctx.RegularDecimalReal()->getText(); auto value = - std::make_unique(StringCastUtils::castToNum(text.c_str(), text.length())); + std::make_unique(function::returnStringCastToNum(text.c_str(), text.length())); return std::make_unique(std::move(value), ctx.getText()); } diff --git a/src/processor/operator/persistent/reader/csv_reader.cpp b/src/processor/operator/persistent/reader/csv_reader.cpp index fd92958517c..8114911538c 100644 --- a/src/processor/operator/persistent/reader/csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv_reader.cpp @@ -11,6 +11,7 @@ #include "common/types/blob.h" #include "common/types/value/value.h" #include "storage/store/table_copy_utils.h" +#include "function/cast/cast_functions.h" using namespace kuzu::common; @@ -114,43 +115,43 @@ void BaseCSVReader::copyStringToVector(ValueVector* vector, std::string& strVal) switch (type.getLogicalTypeID()) { case LogicalTypeID::INT64: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::INT32: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::INT16: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::INT8: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::UINT64: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::UINT32: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::UINT16: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::UINT8: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::FLOAT: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::DOUBLE: { vector->setValue( - rowToAdd, StringCastUtils::castToNum(strVal.c_str(), strVal.length())); + rowToAdd, function::returnStringCastToNum(strVal.c_str(), strVal.length())); } break; case LogicalTypeID::BOOL: { vector->setValue(rowToAdd, StringCastUtils::castToBool(strVal.c_str(), strVal.length())); diff --git a/src/storage/in_mem_storage_structure/in_mem_lists.cpp b/src/storage/in_mem_storage_structure/in_mem_lists.cpp index 6d34ee1dcb0..ab3f056f01a 100644 --- a/src/storage/in_mem_storage_structure/in_mem_lists.cpp +++ b/src/storage/in_mem_storage_structure/in_mem_lists.cpp @@ -163,7 +163,7 @@ void InMemLists::setValue(offset_t nodeOffset, uint64_t pos, uint8_t* val) { template void InMemLists::setValueFromString( offset_t nodeOffset, uint64_t pos, const char* val, uint64_t length) { - auto numericVal = StringCastUtils::castToNum(val); + auto numericVal = function::returnStringCastToNum(val); setValue(nodeOffset, pos, (uint8_t*)&numericVal); } diff --git a/src/storage/store/table_copy_utils.cpp b/src/storage/store/table_copy_utils.cpp index d8e58581d74..1ad4ea6aeb5 100644 --- a/src/storage/store/table_copy_utils.cpp +++ b/src/storage/store/table_copy_utils.cpp @@ -5,6 +5,7 @@ #include "common/exception/parser.h" #include "common/string_utils.h" #include "storage/storage_structure/lists/lists.h" +#include "function/cast/cast_functions.h" #include #include #include @@ -169,52 +170,52 @@ std::unique_ptr TableCopyUtils::getArrowFixedList(const std::string& } switch (childDataType->getLogicalTypeID()) { case LogicalTypeID::INT64: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(int64_t), &val, sizeof(int64_t)); numElementsRead++; } break; case LogicalTypeID::INT32: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(int32_t), &val, sizeof(int32_t)); numElementsRead++; } break; case LogicalTypeID::INT16: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(int16_t), &val, sizeof(int16_t)); numElementsRead++; } break; case LogicalTypeID::INT8: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(int8_t), &val, sizeof(int8_t)); numElementsRead++; } break; case LogicalTypeID::UINT64: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(uint64_t), &val, sizeof(uint64_t)); numElementsRead++; } case LogicalTypeID::UINT32: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(uint32_t), &val, sizeof(uint32_t)); numElementsRead++; } break; case LogicalTypeID::UINT16: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(uint16_t), &val, sizeof(uint16_t)); numElementsRead++; } break; case LogicalTypeID::UINT8: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(uint8_t), &val, sizeof(uint8_t)); numElementsRead++; } break; case LogicalTypeID::DOUBLE: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(double_t), &val, sizeof(double_t)); numElementsRead++; } break; case LogicalTypeID::FLOAT: { - auto val = StringCastUtils::castToNum(element.c_str(), element.length()); + auto val = function::returnStringCastToNum(element.c_str(), element.length()); memcpy(listVal.get() + numElementsRead * sizeof(float_t), &val, sizeof(float_t)); numElementsRead++; } break; @@ -300,43 +301,43 @@ bool TableCopyUtils::tryCast( } case LogicalTypeID::INT64: { int64_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::INT32: { int32_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::INT16: { int16_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::INT8: { int8_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::UINT64: { uint64_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::UINT32: { uint32_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::UINT16: { uint16_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::UINT8: { uint8_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, length, result); } case LogicalTypeID::DOUBLE: { double_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, (size_t)length, result); } case LogicalTypeID::FLOAT: { float_t result; - return StringCastUtils::tryCastToNum(value, length, result); + return function::tryStringCastNum(value, (size_t)length, result); } case LogicalTypeID::DATE: { date_t result; @@ -378,43 +379,43 @@ std::unique_ptr TableCopyUtils::convertStringToValue( switch (type.getLogicalTypeID()) { case LogicalTypeID::INT64: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::INT32: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::INT16: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::INT8: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::UINT64: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::UINT32: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::UINT16: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::UINT8: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::FLOAT: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::DOUBLE: { value = std::make_unique( - StringCastUtils::castToNum(element.c_str(), element.length())); + function::returnStringCastToNum(element.c_str(), element.length())); } break; case LogicalTypeID::BOOL: { value = diff --git a/test/test_files/tinysnb/function/cast.test b/test/test_files/tinysnb/function/cast.test index d4d2e95acf8..ea525c99cca 100644 --- a/test/test_files/tinysnb/function/cast.test +++ b/test/test_files/tinysnb/function/cast.test @@ -397,3 +397,22 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff 1.510000 1.600000 1.323000 + +-LOG CastStringToNum +-STATEMENT RETURN TO_INT64("12"); +---- 1 +12 +-STATEMENT RETURN TO_INT64("9223372036854775807"); +---- 1 +9223372036854775807 +-STATEMENT RETURN TO_INT64("9223372036854775808"); +---- error +Runtime exception: Cast failed. 9223372036854775808 is not in INT64 range. +-STATEMENT RETURN TO_INT64(TO_UINT64("9223372036854775808")); +---- error +Runtime exception: Cast failed. 9223372036854775808 is not in INT64 range. + +-STATEMENT RETURN TO_FLOAT("3.294"); +---- 1 +3.294000 +