From f3a185478f587270c3528602e142da586338e620 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 6 Jan 2023 18:18:22 -0500 Subject: [PATCH] perf: improve ends_in_a_number by 25% --- include/ada/checkers.h | 1 + src/checkers.cpp | 68 +++++++++++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/include/ada/checkers.h b/include/ada/checkers.h index 81fa14566..1636e5e16 100644 --- a/include/ada/checkers.h +++ b/include/ada/checkers.h @@ -10,6 +10,7 @@ namespace ada::checkers { bool ends_in_a_number(std::string_view input) noexcept; bool is_windows_drive_letter(std::string_view input) noexcept; bool is_normalized_windows_drive_letter(std::string_view input) noexcept; + ada_really_inline constexpr bool is_ipv4_number_valid(const std::string_view input) noexcept; } // namespace ada::checkers diff --git a/src/checkers.cpp b/src/checkers.cpp index c696d783c..646474f32 100644 --- a/src/checkers.cpp +++ b/src/checkers.cpp @@ -7,34 +7,55 @@ namespace ada::checkers { // TODO: Refactor this to not use `std::vector` but use pointer arithmetic for performance. bool ends_in_a_number(const std::string_view input) noexcept { - // Let parts be the result of strictly splitting input on U+002E (.). - std::vector parts = ada::helpers::split_string_view(input, '.', false); - - if (parts.empty()) { + if (input.empty()) { return false; } + size_t parts_count = std::count(input.begin(), input.end(), '.'); + + if (parts_count > 0) { parts_count++; } + + static const std::string delimiter = "."; + std::string_view::iterator pointer_start = input.begin(); + std::string_view::iterator pointer_end = input.end(); + // If the last item in parts is the empty string, then: - if (parts.back().empty()) { + if (input.back() == '.') { // If parts’s size is 1, then return false. - if (parts.size() == 1) { + if (parts_count == 1) { return false; } // Remove the last item from parts. - parts.pop_back(); + pointer_end--; + parts_count--; + } + + if (std::distance(pointer_start, pointer_end) == 0) { + return false; } - // Let last be the last item in parts. - std::string_view last = parts.back(); + if (parts_count > 1) { + pointer_start = std::find_end(pointer_start, pointer_end, delimiter.begin(), delimiter.end()); + + if (pointer_start == pointer_end) { + return false; + } + + pointer_start++; + } + + if (std::distance(pointer_start, pointer_end) == 0) { + return false; + } // If last is non-empty and contains only ASCII digits, then return true. - if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) { + if (std::all_of(pointer_start, pointer_end, ::isdigit)) { return true; } // If parsing last as an IPv4 number does not return failure, then return true. - return ada::parser::parse_ipv4_number(last).has_value(); + return is_ipv4_number_valid(std::string(pointer_start, pointer_end)); } // A Windows drive letter is two code points, of which the first is an ASCII alpha @@ -48,5 +69,30 @@ namespace ada::checkers { return is_windows_drive_letter(input) && input[1] == ':'; } + // This function assumes the input is not empty. + ada_really_inline constexpr bool is_ipv4_number_valid(const std::string_view input) noexcept { + // The first two code points are either "0X" or "0x", then: + if (input.length() >= 2 && input[0] == '0' && (input[1] == 'X' || input[1] == 'x')) { + if (input.length() == 2) { + return true; + } + + // Remove the first two code points from input. + // If input contains a code point that is not a radix-R digit, then return failure. + return input.find_first_not_of("0123456789abcdefABCDEF", 2) == std::string_view::npos; + } + // Otherwise, if the first code point is U+0030 (0), then: + else if (input[0] == '0') { + if (input.length() == 1) { + return true; + } + + // Remove the first code point from input. + // If input contains a code point that is not a radix-R digit, then return failure. + return input.find_first_not_of("01234567", 1) == std::string_view::npos; + } + + return std::all_of(input.begin(), input.end(), ::isdigit); + } } // namespace ada::checkers