Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
AEsir777 committed Sep 28, 2023
1 parent 348a435 commit 632d473
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 98 deletions.
2 changes: 1 addition & 1 deletion src/common/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ std::vector<std::string> StringUtils::splitBySpace(const std::string& input) {
return result;
}

char* StringUtils::removeCStringWhiteSpaces(char *input, uint64_t& len) {
const char* StringUtils::removeCStringWhiteSpaces(const char *input, uint64_t& len) {
// skip leading/trailing spaces
uint64_t start = 0;
uint64_t end = len - 1;
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class StringUtils {
str = std::regex_replace(str, whiteSpacePattern, "");
}

static char* removeCStringWhiteSpaces(char *input, uint64_t&len);
static const char* removeCStringWhiteSpaces(const char *input, uint64_t&len);

static void replaceAll(
std::string& str, const std::string& search, const std::string& replacement);
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/types/ku_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct ku_string_t {
static constexpr uint64_t INLINED_SUFFIX_LENGTH = 8;
static constexpr uint64_t SHORT_STR_LENGTH = PREFIX_LENGTH + INLINED_SUFFIX_LENGTH;

uint32_t len;
uint64_t len;
uint8_t prefix[PREFIX_LENGTH];
union {
uint8_t data[INLINED_SUFFIX_LENGTH];
Expand Down
133 changes: 70 additions & 63 deletions src/include/function/cast/cast_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,22 @@ static inline void numericDownCast(SRC& input, DST& result, const std::string& d

template <typename T>
struct IntegerCastData {
T result;
using Result = T;
Result result;
bool seen_decimal;
};

struct IntegerCastOperation {
template <class T, bool NEGATIVE>
static bool HandleDigit(T &state, uint8_t digit) {
using result_t = typename T::Result;
if (NEGATIVE) {
if (state.result < (std::numeric_limits<T>::Minimum() + digit) / 10) {
if (state.result < ((std::numeric_limits<result_t>::min() + digit) / 10)) {
return false;
}
state.result = state.result * 10 - digit;
} else {
if (state.result > (std::numeric_limits<T>::Maximum() - digit) / 10) {
if (state.result > ((std::numeric_limits<result_t>::max() - digit) / 10)) {
return false;
}
state.result = state.result * 10 + digit;
Expand Down Expand Up @@ -196,6 +198,7 @@ struct DecimalCastOperation {
if (state.exponent_type == T::ExponentType::None && state.round_set && state.should_round) {

}
return true;
}
};

Expand Down Expand Up @@ -272,7 +275,7 @@ static bool IntegerCastLoop(const char* input, uint64_t len, T& result) {
}

template<typename T, bool IS_SIGNED = true>
static bool tryIntegerCast(char* input, uint64_t& len, T& result) {
static bool tryIntegerCast(const char* input, const uint64_t& len, T& result) {
auto str = common::StringUtils::removeCStringWhiteSpaces(input, len);
if (len == 0) {
return false;
Expand Down Expand Up @@ -304,51 +307,32 @@ static bool tryIntegerCast(char* input, uint64_t& len, T& result) {
template <typename T, bool IS_SIGNED = true>
static inline bool TrySimpleIntegerCast(const char *input, uint64_t len, T& result) {
IntegerCastData<T> data;
if (TryIntegerCast<IntegerCastData<T>, IS_SIGNED>(input, len, data)) {
if (tryIntegerCast<IntegerCastData<T>, IS_SIGNED>(input, len, data)) {
result = data.result;
return true;
}
return false;
}

template<typename T, bool IS_SIGNED = true>
static bool tryCastStringToNum(char* input, uint64_t& len, T& result) {
auto str = common::StringUtils::removeCStringWhiteSpaces(input, len);
if (len == 0) {
return false;
}

// negative
if (*str == '-') {
if (!IS_SIGNED) { // unsigned if not -0
uint64_t pos = 1;
while (pos < len) {
if (str[pos++] != '0') {
return false;
}
}
}
// decimal separator is default to "."
return IntegerCastLoop<T, true>(str, len, result) ;
}

// not allow leading 0
if (len > 1 && *str == '0') {
return false;
}

return IntegerCastLoop<T, false>(str, len, result);
template<typename T>
static inline bool tryCastStringToNum(const char* input, uint64_t& len, T& result) {
return TrySimpleIntegerCast(input, len, result);
}

template<typename T>
static inline T castStringToNum(
const char* input, const uint64_t& len, const std::string& type = "") {
T result;
if (!tryCastStringToNum(input, len, result)) {
throw common::ConversionException{
"Cast failed. " + std::string{input} + " is not in " + type + " range."};
static inline void castStringToNum(
const char* input, uint64_t& len, T& result, const common::LogicalType type) {

switch (type.getLogicalTypeID()) {
case common::LogicalTypeID::INT64:
if (!tryCastStringToNum(input, len, result)) {
throw common::ConversionException{"Cast failed. " + std::string{input} + " is not in " +
common::LogicalTypeUtils::dataTypeToString(type) +
" range."};
}
default:
return;
}
return result;
}

struct CastToDouble {
Expand All @@ -360,12 +344,15 @@ struct CastToDouble {

template<>
inline void CastToDouble::operation(char*& input, double_t& result) {
result = castStringToNum<double_t>(input, strlen(input), "DOUBLE");
uint64_t len = strlen(input);
castStringToNum<double_t>(
input, len, result, common::LogicalType{common::LogicalTypeID::DOUBLE});
}

template<>
inline void CastToDouble::operation(common::ku_string_t& input, double_t& result) {
result = castStringToNum<double_t>((char*)input.getData(), input.len, "DOUBLE");
castStringToNum<double_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::DOUBLE});
}

struct CastToFloat {
Expand All @@ -377,12 +364,15 @@ struct CastToFloat {

template<>
inline void CastToFloat::operation(char*& input, float_t& result) {
result = castStringToNum<float_t>(input, strlen(input), "FLOAT");
uint64_t len = strlen(input);
castStringToNum<float_t>(
input, len, result, common::LogicalType{common::LogicalTypeID::FLOAT});
}

template<>
inline void CastToFloat::operation(common::ku_string_t& input, float_t& result) {
result = castStringToNum<float_t>((char*)input.getData(), input.len, "FLOAT");
castStringToNum<float_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::FLOAT});
}

template<>
Expand Down Expand Up @@ -413,13 +403,16 @@ inline void CastToInt64::operation(float_t& input, int64_t& result) {
}

template<>
inline void CastToInt64::operation(common::ku_string_t& input, int64_t& result) {
result = castStringToNum<int64_t>((char*)input.getData(), input.len, "INT64");
inline void CastToInt64::operation(char*& input, int64_t& result) {
uint64_t len = strlen(input);
castStringToNum<int64_t>(
input, len, result, common::LogicalType{common::LogicalTypeID::INT64});
}

template<>
inline void CastToInt64::operation(char*& input, int64_t& result) {
result = castStringToNum<int64_t>(input, strlen(input), "INT64");
inline void CastToInt64::operation(common::ku_string_t& input, int64_t& result) {
castStringToNum<int64_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::INT64});
}

struct CastToSerial {
Expand Down Expand Up @@ -462,13 +455,16 @@ inline void CastToInt32::operation(uint32_t& input, int32_t& result) {
}

template<>
inline void CastToInt32::operation(common::ku_string_t& input, int32_t& result) {
result = castStringToNum<int32_t>((char*)input.getData(), input.len, "INT32");
inline void CastToInt32::operation(char*& input, int32_t& result) {
uint64_t len = strlen(input);
castStringToNum<int32_t>(
input, len, result, common::LogicalType{common::LogicalTypeID::INT32});
}

template<>
inline void CastToInt32::operation(char*& input, int32_t& result) {
result = castStringToNum<int32_t>(input, strlen(input), "INT32");
inline void CastToInt32::operation(common::ku_string_t& input, int32_t& result) {
castStringToNum<int32_t>(
(char* )input.getData(), input.len, result, common::LogicalType{common::LogicalTypeID::INT32});
}

struct CastToInt16 {
Expand All @@ -490,12 +486,15 @@ inline void CastToInt16::operation(uint8_t& input, int16_t& result) {

template<>
inline void CastToInt16::operation(common::ku_string_t& input, int16_t& result) {
result = castStringToNum<int16_t>((char*)input.getData(), input.len, "INT16");
castStringToNum<int16_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::INT16});
}

template<>
inline void CastToInt16::operation(char*& input, int16_t& result) {
result = castStringToNum<int16_t>(input, strlen(input), "INT16");
uint64_t len = strlen(input);
castStringToNum<int16_t>(
input, len, result, common::LogicalType{common::LogicalTypeID::INT16});
}

struct CastToInt8 {
Expand All @@ -507,12 +506,14 @@ struct CastToInt8 {

template<>
inline void CastToInt8::operation(common::ku_string_t& input, int8_t& result) {
result = castStringToNum<int8_t>((char*)input.getData(), input.len, "INT8");
castStringToNum<int8_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::INT8});
}

template<>
inline void CastToInt8::operation(char*& input, int8_t& result) {
result = castStringToNum<int8_t>(input, strlen(input), "INT8");
uint64_t len = strlen(input);
castStringToNum<int8_t>(input, len, result, common::LogicalType{common::LogicalTypeID::INT8});
}

struct CastToUInt64 {
Expand All @@ -534,12 +535,14 @@ inline void CastToUInt64::operation(float_t& input, uint64_t& result) {

template<>
inline void CastToUInt64::operation(common::ku_string_t& input, uint64_t& result) {
result = castStringToNum<uint64_t>((char*)input.getData(), input.len, "UINT64");
castStringToNum<uint64_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::UINT64});
}

template<>
inline void CastToUInt64::operation(char*& input, uint64_t& result) {
result = castStringToNum<uint64_t>(input, strlen(input), "UINT64");
uint64_t len = strlen(input);
castStringToNum<uint64_t>(input, len, result, common::LogicalType{common::LogicalTypeID::UINT64});
}

struct CastToUInt32 {
Expand Down Expand Up @@ -571,12 +574,14 @@ inline void CastToUInt32::operation(uint64_t& input, uint32_t& result) {

template<>
inline void CastToUInt32::operation(common::ku_string_t& input, uint32_t& result) {
result = castStringToNum<uint32_t>((char*)input.getData(), input.len, "UINT32");
castStringToNum<uint32_t>((char*)input.getData(), input.len, result,
common::LogicalType{common::LogicalTypeID::UINT32});
}

template<>
inline void CastToUInt32::operation(char*& input, uint32_t& result) {
result = castStringToNum<uint32_t>(input, strlen(input), "UINT32");
uint64_t len = strlen(input);
castStringToNum<uint32_t>(input, len, result, common::LogicalType{common::LogicalTypeID::UINT32});
}

struct CastToUInt16 {
Expand Down Expand Up @@ -618,12 +623,13 @@ inline void CastToUInt16::operation(uint32_t& input, uint16_t& result) {

template<>
inline void CastToUInt16::operation(common::ku_string_t& input, uint16_t& result) {
result = castStringToNum<uint16_t>((char*)input.getData(), input.len, "UINT16");
castStringToNum<uint16_t>((char*)input.getData(), input.len, result, common::LogicalType{common::LogicalTypeID::UINT16});
}

template<>
inline void CastToUInt16::operation(char*& input, uint16_t& result) {
result = castStringToNum<uint16_t>(input, strlen(input), "UINT16");
uint64_t len = strlen(input);
castStringToNum<uint16_t>(input, len, result, common::LogicalType{common::LogicalTypeID::UINT16});
}

struct CastToUInt8 {
Expand Down Expand Up @@ -675,12 +681,13 @@ inline void CastToUInt8::operation(uint16_t& input, uint8_t& result) {

template<>
inline void CastToUInt8::operation(common::ku_string_t& input, uint8_t& result) {
result = castStringToNum<uint8_t>((char*)input.getData(), input.len, "UINT8");
castStringToNum<uint8_t>((char*)input.getData(), input.len, result, common::LogicalType{common::LogicalTypeID::UINT8});
}

template<>
inline void CastToUInt8::operation(char*& input, uint8_t& result) {
result = castStringToNum<uint8_t>(input, strlen(input), "UINT8");
uint64_t len = strlen(input);
castStringToNum<uint8_t>(input, len, result, common::LogicalType{common::LogicalTypeID::UINT8});
}

} // namespace function
Expand Down
3 changes: 2 additions & 1 deletion src/include/storage/store/column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class ColumnChunk {

template<typename T>
void setValueFromString(const char* value, uint64_t length, common::offset_t pos) {
auto val = function::castStringToNum<T>(value, length);
T val;
function::castStringToNum<T>(value, length, val, common::LogicalType{common::LogicalTypeID::ANY});
setValue<T>(val, pos);
}

Expand Down
5 changes: 4 additions & 1 deletion src/processor/operator/persistent/reader/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,13 @@ void BaseCSVReader::copyStringToVector(ValueVector* vector, std::string& strVal)
} else {
vector->setNull(rowToAdd, false /* isNull */);
}
uint64_t
switch (type.getLogicalTypeID()) {
case LogicalTypeID::INT64: {
int64_t val;
function::castStringToNum<int64_t>(strVal.c_str(), strVal.length(), val, type)
vector->setValue(
rowToAdd, function::castStringToNum<int64_t>(strVal.c_str(), strVal.length()));
rowToAdd, val);
} break;
case LogicalTypeID::INT32: {
vector->setValue(
Expand Down
Loading

0 comments on commit 632d473

Please sign in to comment.