From 5b19d38cedc9d613ab16935a84ac761641920dfb Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Thu, 25 Jul 2024 19:37:16 +0800 Subject: [PATCH] 1 --- be/src/vec/functions/function_cast.h | 81 ++++++++++---------------- be/src/vec/runtime/vdatetime_value.cpp | 29 +++++---- 2 files changed, 51 insertions(+), 59 deletions(-) diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 563eac408d24e3..0adada0e4539bf 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -967,9 +967,9 @@ struct NameToDateTime { static constexpr auto name = "toDateTime"; }; -template +template bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, FunctionContext* context, - Additions additions [[maybe_unused]] = Additions()) { + UInt32 scale [[maybe_unused]] = 0) { if constexpr (IsDateTimeType) { return try_read_datetime_text(x, rb, context->state()->timezone_obj()); } @@ -983,7 +983,6 @@ bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, FunctionCon } if constexpr (IsDateTimeV2Type) { - UInt32 scale = additions; return try_read_datetime_v2_text(x, rb, context->state()->timezone_obj(), scale); } @@ -1021,7 +1020,6 @@ bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, FunctionCon template StringParser::ParseResult try_parse_decimal_impl(typename DataType::FieldType& x, ReadBuffer& rb, - const cctz::time_zone& local_time_zone, Additions additions [[maybe_unused]] = Additions()) { if constexpr (IsDataTypeDecimalV2) { @@ -1450,15 +1448,9 @@ class PreparedFunctionCast : public PreparedFunctionImpl { const char* name; }; -struct NameCast { - static constexpr auto name = "CAST"; -}; - -template -struct ConvertThroughParsing { - static_assert(std::is_same_v, - "ConvertThroughParsing is only applicable for String or FixedString data types"); - +// always from DataTypeString +template +struct StringParsing { using ToFieldType = typename ToDataType::FieldType; static bool is_all_read(ReadBuffer& in) { return in.eof(); } @@ -1471,48 +1463,38 @@ struct ConvertThroughParsing { ColumnDecimal, ColumnVector>; const IColumn* col_from = block.get_by_position(arguments[0]).column.get(); - const ColumnString* col_from_string = check_and_get_column(col_from); + const auto* col_from_string = check_and_get_column(col_from); - if (std::is_same_v && !col_from_string) { + if (!col_from_string) { return Status::RuntimeError("Illegal column {} of first argument of function {}", col_from->get_name(), Name::name); } - size_t size = input_rows_count; + size_t row = input_rows_count; typename ColVecTo::MutablePtr col_to = nullptr; if constexpr (IsDataTypeDecimal) { UInt32 scale = ((PrecisionScaleArg)additions).scale; ToDataType::check_type_scale(scale); - col_to = ColVecTo::create(size, scale); + col_to = ColVecTo::create(row, scale); } else { - col_to = ColVecTo::create(size); + col_to = ColVecTo::create(row); } typename ColVecTo::Container& vec_to = col_to->get_data(); ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::Container* vec_null_map_to [[maybe_unused]] = nullptr; - col_null_map_to = ColumnUInt8::create(size); + col_null_map_to = ColumnUInt8::create(row); vec_null_map_to = &col_null_map_to->get_data(); - const ColumnString::Chars* chars = nullptr; - const IColumn::Offsets* offsets = nullptr; - size_t fixed_string_size = 0; - - if constexpr (std::is_same_v) { - chars = &col_from_string->get_chars(); - offsets = &col_from_string->get_offsets(); - } + const ColumnString::Chars* chars = &col_from_string->get_chars(); + const IColumn::Offsets* offsets = &col_from_string->get_offsets(); size_t current_offset = 0; - for (size_t i = 0; i < size; ++i) { - size_t next_offset = std::is_same_v - ? (*offsets)[i] - : (current_offset + fixed_string_size); - size_t string_size = std::is_same_v - ? next_offset - current_offset - : fixed_string_size; + for (size_t i = 0; i < row; ++i) { + size_t next_offset = (*offsets)[i]; + size_t string_size = next_offset - current_offset; ReadBuffer read_buffer(&(*chars)[current_offset], string_size); @@ -1520,8 +1502,7 @@ struct ConvertThroughParsing { if constexpr (IsDataTypeDecimal) { ToDataType::check_type_precision((PrecisionScaleArg(additions).precision)); StringParser::ParseResult res = try_parse_decimal_impl( - vec_to[i], read_buffer, context->state()->timezone_obj(), - PrecisionScaleArg(additions)); + vec_to[i], read_buffer, PrecisionScaleArg(additions)); parsed = (res == StringParser::PARSE_SUCCESS || res == StringParser::PARSE_OVERFLOW || res == StringParser::PARSE_UNDERFLOW); @@ -1531,8 +1512,8 @@ struct ConvertThroughParsing { parsed = try_parse_impl(vec_to[i], read_buffer, context, type->get_scale()); } else { - parsed = try_parse_impl(vec_to[i], read_buffer, - context); + parsed = + try_parse_impl(vec_to[i], read_buffer, context); } (*vec_null_map_to)[i] = !parsed || !is_all_read(read_buffer); current_offset = next_offset; @@ -1546,25 +1527,27 @@ struct ConvertThroughParsing { template struct ConvertImpl, Name> - : ConvertThroughParsing, Name> {}; + : StringParsing, Name> {}; template struct ConvertImpl, Name> - : ConvertThroughParsing, Name> {}; + : StringParsing, Name> {}; template struct ConvertImpl, Name> - : ConvertThroughParsing, Name> {}; + : StringParsing, Name> {}; template struct ConvertImpl, Name> - : ConvertThroughParsing, Name> {}; + : StringParsing, Name> {}; template struct ConvertImpl, Name> - : ConvertThroughParsing, Name> {}; + : StringParsing, Name> {}; template -struct ConvertImpl - : ConvertThroughParsing {}; +struct ConvertImpl : StringParsing {}; template -struct ConvertImpl - : ConvertThroughParsing {}; +struct ConvertImpl : StringParsing {}; + +struct NameCast { + static constexpr auto name = "CAST"; +}; template class FunctionConvertFromString : public IFunction { @@ -1599,8 +1582,8 @@ class FunctionConvertFromString : public IFunction { const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); if (check_and_get_data_type(from_type)) { - return ConvertThroughParsing::execute( - context, block, arguments, result, input_rows_count); + return StringParsing::execute(context, block, arguments, result, + input_rows_count); } return Status::RuntimeError( diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 610983a149d669..877573bcccb846 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -55,6 +55,15 @@ uint8_t mysql_week_mode(uint32_t mode) { return mode; } +static bool check_space(char ch) { + // \t, \n, \v, \f, \r are 9~13, respectively. + return UNLIKELY(ch == ' ' || (ch >= 9 && ch <= 13)); +} + +static bool check_date_punct(char ch) { + return UNLIKELY(!(isdigit(ch) || isalpha(ch))); +} + static bool time_zone_begins(const char* ptr, const char* end) { return *ptr == '+' || (*ptr == '-' && ptr + 3 < end && *(ptr + 3) == ':') || (isalpha(*ptr) && *ptr != 'T'); @@ -104,7 +113,7 @@ bool VecDateTimeValue::from_date_str_base(const char* date_str, int len, _neg = false; // Skip space character - while (ptr < end && isspace(*ptr)) { + while (ptr < end && check_space(*ptr)) { ptr++; } if (ptr == end || !isdigit(*ptr)) { @@ -202,8 +211,8 @@ bool VecDateTimeValue::from_date_str_base(const char* date_str, int len, continue; } // escape separator - while (ptr < end && (ispunct(*ptr) || isspace(*ptr))) { - if (isspace(*ptr)) { + while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) { + if (check_space(*ptr)) { if (((1 << field_idx) & allow_space_mask) == 0) { return false; } @@ -1235,7 +1244,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { // Skip space character - while (val < val_end && isspace(*val)) { + while (val < val_end && check_space(*val)) { val++; } if (val >= val_end) { @@ -1500,7 +1509,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, default: return false; } - } else if (!isspace(*ptr)) { + } else if (!check_space(*ptr)) { if (*ptr != *val) { return false; } @@ -1987,13 +1996,13 @@ bool DateV2Value::from_date_str(const char* date_str, int len, int scale /* = bool convert_zero) { return from_date_str_base(date_str, len, scale, nullptr, convert_zero); } -// when we parse template bool DateV2Value::from_date_str(const char* date_str, int len, const cctz::time_zone& local_time_zone, int scale /* = -1*/, bool convert_zero) { return from_date_str_base(date_str, len, scale, &local_time_zone, convert_zero); } +// if local_time_zone is null, only be able to parse time without timezone template bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale, const cctz::time_zone* local_time_zone, bool convert_zero) { @@ -2005,7 +2014,7 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale int32_t date_len[MAX_DATE_PARTS] = {0}; // Skip space character - while (ptr < end && isspace(*ptr)) { + while (ptr < end && check_space(*ptr)) { ptr++; } if (ptr == end || !isdigit(*ptr)) { @@ -2153,8 +2162,8 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale continue; } // escape separator - while (ptr < end && (ispunct(*ptr) || isspace(*ptr))) { - if (isspace(*ptr)) { + while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) { + if (check_space(*ptr)) { if (((1 << field_idx) & allow_space_mask) == 0) { return false; } @@ -2286,7 +2295,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co auto [year, month, day, hour, minute, second, microsecond] = std::tuple {0, 0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { // Skip space character - while (val < val_end && isspace(*val)) { + while (val < val_end && check_space(*val)) { val++; } if (val >= val_end) {