Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add castString in functions #2092

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ include_directories(third_party/re2/include)
include_directories(third_party/concurrentqueue)
include_directories(third_party/serd/include)
include_directories(third_party/miniparquet/src)
include_directories(third_party/fast_float/include)

add_subdirectory(third_party)
add_subdirectory(src)
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ add_subdirectory(transaction)

add_library(kuzu STATIC ${ALL_OBJECT_FILES})
target_link_libraries(kuzu
PUBLIC antlr4_cypher antlr4_runtime utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
PUBLIC antlr4_cypher antlr4_runtime fast_float utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
target_include_directories(kuzu
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
add_library(kuzu_shared SHARED ${ALL_OBJECT_FILES})
Expand All @@ -24,6 +24,6 @@ else()
set_target_properties(kuzu_shared PROPERTIES OUTPUT_NAME kuzu)
endif()
target_link_libraries(kuzu_shared
PUBLIC antlr4_cypher antlr4_runtime utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
PUBLIC antlr4_cypher antlr4_runtime fast_float utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
target_include_directories(kuzu_shared
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
11 changes: 11 additions & 0 deletions src/common/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ std::vector<std::string> StringUtils::splitBySpace(const std::string& input) {
return result;
}

void StringUtils::removeCStringWhiteSpaces(const char*& input, uint64_t& len) {
// skip leading/trailing spaces
while (len > 0 && isspace(input[0])) {
input++;
len--;
}
while (len > 0 && isspace(input[len - 1])) {
len--;
}
}

void StringUtils::replaceAll(
std::string& str, const std::string& search, const std::string& replacement) {
size_t pos = 0;
Expand Down
55 changes: 44 additions & 11 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ vector_function_definitions CastToDoubleVectorFunction::getDefinitions() {
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::DOUBLE));
result.push_back(bindVectorFunction<float_t, double_t, CastToDouble>(
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::DOUBLE));
result.push_back(bindVectorFunction<ku_string_t, double_t, CastToDouble>(
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::DOUBLE));
return result;
}

Expand All @@ -273,6 +275,8 @@ vector_function_definitions CastToFloatVectorFunction::getDefinitions() {
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::FLOAT));
result.push_back(bindVectorFunction<uint8_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::FLOAT));
result.push_back(bindVectorFunction<ku_string_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::FLOAT));
// down cast
result.push_back(bindVectorFunction<double_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::FLOAT));
Expand Down Expand Up @@ -314,16 +318,19 @@ vector_function_definitions CastToInt64VectorFunction::getDefinitions() {
// down cast
result.push_back(bindVectorFunction<uint64_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<float_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<double_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64));
// down cast end
result.push_back(bindVectorFunction<uint32_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<uint16_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<uint8_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<float_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<double_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<ku_string_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64));
return result;
}

Expand All @@ -340,14 +347,17 @@ vector_function_definitions CastToInt32VectorFunction::getDefinitions() {
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint32_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint16_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint8_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<float_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<double_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT32));
// down cast end
result.push_back(bindVectorFunction<uint16_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint8_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<ku_string_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT32));
return result;
}

Expand All @@ -366,12 +376,15 @@ vector_function_definitions CastToInt16VectorFunction::getDefinitions() {
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<uint16_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<uint8_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<float_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<double_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT16));
// down cast end
result.push_back(bindVectorFunction<uint8_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<ku_string_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT16));
return result;
}

Expand All @@ -386,8 +399,20 @@ vector_function_definitions CastToInt8VectorFunction::getDefinitions() {
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<float_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint64_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint32_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint16_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint8_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<double_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<float_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<ku_string_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT8));
return result;
}

Expand All @@ -399,7 +424,6 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() {
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<uint32_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::UINT64));
// down cast
result.push_back(bindVectorFunction<int8_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT8, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<int16_t, uint64_t, CastToUInt64>(
Expand All @@ -408,10 +432,13 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() {
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT32, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<int64_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::UINT64));
// down cast
result.push_back(bindVectorFunction<float_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<double_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<ku_string_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT64));
return result;
}

Expand All @@ -436,6 +463,8 @@ vector_function_definitions CastToUInt32VectorFunction::getDefinitions() {
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT32));
result.push_back(bindVectorFunction<double_t, uint32_t, CastToUInt32>(
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT32));
result.push_back(bindVectorFunction<ku_string_t, uint32_t, CastToUInt32>(
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT32));
return result;
}

Expand All @@ -460,6 +489,8 @@ vector_function_definitions CastToUInt16VectorFunction::getDefinitions() {
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT16));
result.push_back(bindVectorFunction<double_t, uint16_t, CastToUInt16>(
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT16));
result.push_back(bindVectorFunction<ku_string_t, uint16_t, CastToUInt16>(
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT16));
return result;
}

Expand All @@ -484,6 +515,8 @@ vector_function_definitions CastToUInt8VectorFunction::getDefinitions() {
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT8));
result.push_back(bindVectorFunction<double_t, uint8_t, CastToUInt8>(
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT8));
result.push_back(bindVectorFunction<ku_string_t, uint8_t, CastToUInt8>(
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT8));
return result;
}

Expand Down
2 changes: 2 additions & 0 deletions src/include/common/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class StringUtils {
str = std::regex_replace(str, whiteSpacePattern, "");
}

static void removeCStringWhiteSpaces(const char*& input, uint64_t& len);

static void replaceAll(
std::string& str, const std::string& search, const std::string& replacement);

Expand Down
33 changes: 0 additions & 33 deletions src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,6 @@ class StringCastUtils {
public:
static bool tryCastToBoolean(const char* data, uint64_t length, bool& result);
static bool castToBool(const char* data, uint64_t length);
template<typename T>
static bool tryCastToNum(const char* data, uint64_t length, T& result) {
auto numStr = std::string{data, length};
removeSpace(numStr);
std::istringstream iss{numStr};
if (iss.str().empty()) {
throw ConversionException{"Empty string."};
}

if constexpr (std::is_same_v<int8_t, T>) {
int val;
iss >> val; // C++ will recognize int8 as char if we don't separate this case.
result = val;
} else if constexpr (std::is_same_v<uint8_t, T>) {
int val;
iss >> val; // C++ will recognize int8 as char if we don't separate this case.
result = val;
} else
iss >> result;

if (iss.fail() || !iss.eof()) {
return false;
}
return true;
}
template<typename T>
static T castToNum(const char* data, uint64_t length) {
T result;
if (!tryCastToNum(data, length, result)) {
throw ConversionException{"Invalid number: " + std::string{data, length} + "."};
}
return result;
}

private:
static void removeSpace(std::string& str);
Expand Down
Loading
Loading