Skip to content

Commit

Permalink
finish string cast to num
Browse files Browse the repository at this point in the history
  • Loading branch information
AEsir777 committed Sep 29, 2023
1 parent d4b0f19 commit af4bd0b
Show file tree
Hide file tree
Showing 19 changed files with 3,018 additions and 116 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ include_directories(third_party/re2/include)
include_directories(third_party/concurrentqueue)
include_directories(third_party/serd/include)
include_directories(third_party/miniparquet/src)
include_directories(third_party/fast_float/include)

add_subdirectory(third_party)
add_subdirectory(src)
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ add_subdirectory(transaction)

add_library(kuzu STATIC ${ALL_OBJECT_FILES})
target_link_libraries(kuzu
PUBLIC antlr4_cypher antlr4_runtime utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
PUBLIC antlr4_cypher antlr4_runtime fast_float utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
target_include_directories(kuzu
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
add_library(kuzu_shared SHARED ${ALL_OBJECT_FILES})
Expand All @@ -24,6 +24,6 @@ else()
set_target_properties(kuzu_shared PROPERTIES OUTPUT_NAME kuzu)
endif()
target_link_libraries(kuzu_shared
PUBLIC antlr4_cypher antlr4_runtime utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
PUBLIC antlr4_cypher antlr4_runtime fast_float utf8proc re2 serd ${PARQUET_LIB} ${ARROW_LIB} Threads::Threads fastpfor miniparquet)
target_include_directories(kuzu_shared
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
15 changes: 15 additions & 0 deletions src/common/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ std::vector<std::string> StringUtils::splitBySpace(const std::string& input) {
return result;
}

void StringUtils::removeCStringWhiteSpaces(const char*& input, uint64_t& len) {
// skip leading/trailing spaces
uint64_t start = 0;
uint64_t end = len - 1;
while (start < len && isspace(input[start])) {
start++;
len--;
}
while (end > start && isspace(input[end])) {
end--;
len--;
}
input+=start;
}

void StringUtils::replaceAll(
std::string& str, const std::string& search, const std::string& replacement) {
size_t pos = 0;
Expand Down
55 changes: 44 additions & 11 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ vector_function_definitions CastToDoubleVectorFunction::getDefinitions() {
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::DOUBLE));
result.push_back(bindVectorFunction<float_t, double_t, CastToDouble>(
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::DOUBLE));
result.push_back(bindVectorFunction<ku_string_t, double_t, CastToDouble>(
CAST_TO_DOUBLE_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::DOUBLE));
return result;
}

Expand All @@ -273,6 +275,8 @@ vector_function_definitions CastToFloatVectorFunction::getDefinitions() {
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::FLOAT));
result.push_back(bindVectorFunction<uint8_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::FLOAT));
result.push_back(bindVectorFunction<ku_string_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::FLOAT));
// down cast
result.push_back(bindVectorFunction<double_t, float_t, CastToFloat>(
CAST_TO_FLOAT_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::FLOAT));
Expand Down Expand Up @@ -314,16 +318,19 @@ vector_function_definitions CastToInt64VectorFunction::getDefinitions() {
// down cast
result.push_back(bindVectorFunction<uint64_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<float_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<double_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64));
// down cast end
result.push_back(bindVectorFunction<uint32_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<uint16_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<uint8_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<float_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<double_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT64));
result.push_back(bindVectorFunction<ku_string_t, int64_t, CastToInt64>(
CAST_TO_INT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT64));
return result;
}

Expand All @@ -340,14 +347,17 @@ vector_function_definitions CastToInt32VectorFunction::getDefinitions() {
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint32_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint16_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint8_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<float_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<double_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT32));
// down cast end
result.push_back(bindVectorFunction<uint16_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<uint8_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT32));
result.push_back(bindVectorFunction<ku_string_t, int32_t, CastToInt32>(
CAST_TO_INT32_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT32));
return result;
}

Expand All @@ -366,12 +376,15 @@ vector_function_definitions CastToInt16VectorFunction::getDefinitions() {
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<uint16_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<uint8_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<float_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<double_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT16));
// down cast end
result.push_back(bindVectorFunction<uint8_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT16));
result.push_back(bindVectorFunction<ku_string_t, int16_t, CastToInt16>(
CAST_TO_INT16_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT16));
return result;
}

Expand All @@ -386,8 +399,20 @@ vector_function_definitions CastToInt8VectorFunction::getDefinitions() {
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<float_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint64_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT64, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint32_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint16_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<uint8_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::UINT8, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<double_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<float_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::INT8));
result.push_back(bindVectorFunction<ku_string_t, int8_t, CastToInt8>(
CAST_TO_INT8_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::INT8));
return result;
}

Expand All @@ -399,7 +424,6 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() {
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT16, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<uint32_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::UINT32, LogicalTypeID::UINT64));
// down cast
result.push_back(bindVectorFunction<int8_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT8, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<int16_t, uint64_t, CastToUInt64>(
Expand All @@ -408,10 +432,13 @@ vector_function_definitions CastToUInt64VectorFunction::getDefinitions() {
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT32, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<int64_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::INT64, LogicalTypeID::UINT64));
// down cast
result.push_back(bindVectorFunction<float_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<double_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT64));
result.push_back(bindVectorFunction<ku_string_t, uint64_t, CastToUInt64>(
CAST_TO_UINT64_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT64));
return result;
}

Expand All @@ -436,6 +463,8 @@ vector_function_definitions CastToUInt32VectorFunction::getDefinitions() {
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT32));
result.push_back(bindVectorFunction<double_t, uint32_t, CastToUInt32>(
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT32));
result.push_back(bindVectorFunction<ku_string_t, uint32_t, CastToUInt32>(
CAST_TO_UINT32_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT32));
return result;
}

Expand All @@ -460,6 +489,8 @@ vector_function_definitions CastToUInt16VectorFunction::getDefinitions() {
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT16));
result.push_back(bindVectorFunction<double_t, uint16_t, CastToUInt16>(
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT16));
result.push_back(bindVectorFunction<ku_string_t, uint16_t, CastToUInt16>(
CAST_TO_UINT16_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT16));
return result;
}

Expand All @@ -484,6 +515,8 @@ vector_function_definitions CastToUInt8VectorFunction::getDefinitions() {
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::FLOAT, LogicalTypeID::UINT8));
result.push_back(bindVectorFunction<double_t, uint8_t, CastToUInt8>(
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::DOUBLE, LogicalTypeID::UINT8));
result.push_back(bindVectorFunction<ku_string_t, uint8_t, CastToUInt8>(
CAST_TO_UINT8_FUNC_NAME, LogicalTypeID::STRING, LogicalTypeID::UINT8));
return result;
}

Expand Down
2 changes: 2 additions & 0 deletions src/include/common/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class StringUtils {
str = std::regex_replace(str, whiteSpacePattern, "");
}

static void removeCStringWhiteSpaces(const char*& input, uint64_t& len);

static void replaceAll(
std::string& str, const std::string& search, const std::string& replacement);

Expand Down
33 changes: 0 additions & 33 deletions src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,6 @@ class StringCastUtils {
public:
static bool tryCastToBoolean(const char* data, uint64_t length, bool& result);
static bool castToBool(const char* data, uint64_t length);
template<typename T>
static bool tryCastToNum(const char* data, uint64_t length, T& result) {
auto numStr = std::string{data, length};
removeSpace(numStr);
std::istringstream iss{numStr};
if (iss.str().empty()) {
throw ConversionException{"Empty string."};
}

if constexpr (std::is_same_v<int8_t, T>) {
int val;
iss >> val; // C++ will recognize int8 as char if we don't separate this case.
result = val;
} else if constexpr (std::is_same_v<uint8_t, T>) {
int val;
iss >> val; // C++ will recognize int8 as char if we don't separate this case.
result = val;
} else
iss >> result;

if (iss.fail() || !iss.eof()) {
return false;
}
return true;
}
template<typename T>
static T castToNum(const char* data, uint64_t length) {
T result;
if (!tryCastToNum(data, length, result)) {
throw ConversionException{"Invalid number: " + std::string{data, length} + "."};
}
return result;
}

private:
static void removeSpace(std::string& str);
Expand Down
Loading

0 comments on commit af4bd0b

Please sign in to comment.