Skip to content

Commit

Permalink
perf: improve ends_in_a_number by 25%
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Jan 6, 2023
1 parent dcac59b commit 8d1ddc5
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 11 deletions.
1 change: 1 addition & 0 deletions include/ada/checkers.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace ada::checkers {
bool ends_in_a_number(std::string_view input) noexcept;
bool is_windows_drive_letter(std::string_view input) noexcept;
bool is_normalized_windows_drive_letter(std::string_view input) noexcept;
ada_really_inline constexpr bool is_ipv4_number_valid(const std::string_view input) noexcept;

} // namespace ada::checkers

Expand Down
68 changes: 57 additions & 11 deletions src/checkers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,55 @@ namespace ada::checkers {

// TODO: Refactor this to not use `std::vector` but use pointer arithmetic for performance.
bool ends_in_a_number(const std::string_view input) noexcept {
// Let parts be the result of strictly splitting input on U+002E (.).
std::vector<std::string> parts = ada::helpers::split_string_view(input, '.', false);

if (parts.empty()) {
if (input.empty()) {
return false;
}

size_t parts_count = std::count(input.begin(), input.end(), '.');

if (parts_count > 0) { parts_count++; }

static const std::string delimiter = ".";
std::string_view::iterator pointer_start = input.begin();
std::string_view::iterator pointer_end = input.end();

// If the last item in parts is the empty string, then:
if (parts.back().empty()) {
if (input.back() == '.') {
// If parts’s size is 1, then return false.
if (parts.size() == 1) {
if (parts_count == 1) {
return false;
}

// Remove the last item from parts.
parts.pop_back();
pointer_end--;
parts_count--;
}

if (std::distance(pointer_start, pointer_end) == 0) {
return false;
}

// Let last be the last item in parts.
std::string_view last = parts.back();
if (parts_count > 1) {
pointer_start = std::find_end(pointer_start, pointer_end, delimiter.begin(), delimiter.end());

if (pointer_start == pointer_end) {
return false;
}

pointer_start++;
}

if (std::distance(pointer_start, pointer_end) == 0) {
return false;
}

// If last is non-empty and contains only ASCII digits, then return true.
if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) {
if (std::all_of(pointer_start, pointer_end, ::isdigit)) {
return true;
}

// If parsing last as an IPv4 number does not return failure, then return true.
return ada::parser::parse_ipv4_number(last).has_value();
return is_ipv4_number_valid(std::string(pointer_start, pointer_end));
}

// A Windows drive letter is two code points, of which the first is an ASCII alpha
Expand All @@ -48,5 +69,30 @@ namespace ada::checkers {
return is_windows_drive_letter(input) && input[1] == ':';
}

// This function assumes the input is not empty.
ada_really_inline constexpr bool is_ipv4_number_valid(const std::string_view input) noexcept {
// The first two code points are either "0X" or "0x", then:
if (input.length() >= 2 && input[0] == '0' && (input[1] == 'X' || input[1] == 'x')) {
if (input.length() == 2) {
return true;
}

// Remove the first two code points from input.
// If input contains a code point that is not a radix-R digit, then return failure.
return input.find_first_not_of("0123456789abcdefABCDEF", 2) == std::string_view::npos;
}
// Otherwise, if the first code point is U+0030 (0), then:
else if (input[0] == '0') {
if (input.length() == 1) {
return true;
}

// Remove the first code point from input.
// If input contains a code point that is not a radix-R digit, then return failure.
return input.find_first_not_of("01234567", 1) == std::string_view::npos;
}

return std::all_of(input.begin(), input.end(), ::isdigit);
}

} // namespace ada::checkers

0 comments on commit 8d1ddc5

Please sign in to comment.