diff --git a/deps/ada/ada.cpp b/deps/ada/ada.cpp index 84dd6db3e1ed78..e0ee4a508dd7e9 100644 --- a/deps/ada/ada.cpp +++ b/deps/ada/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-05-08 12:41:03 -0400. Do not edit! */ +/* auto-generated on 2023-05-16 13:48:47 -0400. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -9786,6 +9786,11 @@ std::string to_unicode(std::string_view input) { ADA_POP_DISABLE_WARNINGS #include +#if ADA_NEON +#include +#elif ADA_SSE2 +#include +#endif namespace ada::unicode { @@ -9817,8 +9822,58 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept { } return non_ascii == 0; } - -ada_really_inline constexpr bool has_tabs_or_newline( +#if ADA_NEON +ada_really_inline bool has_tabs_or_newline( + std::string_view user_input) noexcept { + size_t i = 0; + const uint8x16_t mask1 = vmovq_n_u8('\r'); + const uint8x16_t mask2 = vmovq_n_u8('\n'); + const uint8x16_t mask3 = vmovq_n_u8('\t'); + uint8x16_t running{0}; + for (; i + 15 < user_input.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i); + running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1), + vceqq_u8(word, mask2))), + vceqq_u8(word, mask3)); + } + if (i < user_input.size()) { + uint8_t buffer[16]{}; + memcpy(buffer, user_input.data() + i, user_input.size() - i); + uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i); + running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1), + vceqq_u8(word, mask2))), + vceqq_u8(word, mask3)); + } + return vmaxvq_u8(running) != 0; +} +#elif ADA_SSE2 +ada_really_inline bool has_tabs_or_newline( + std::string_view user_input) noexcept { + size_t i = 0; + const __m128i mask1 = _mm_set1_epi8('\r'); + const __m128i mask2 = _mm_set1_epi8('\n'); + const __m128i mask3 = _mm_set1_epi8('\t'); + __m128i running{0}; + for (; i + 15 < user_input.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(user_input.data() + i)); + running = _mm_or_si128( + _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1), + _mm_cmpeq_epi8(word, mask2))), + _mm_cmpeq_epi8(word, mask3)); + } + if (i < user_input.size()) { + uint8_t buffer[16]{}; + memcpy(buffer, user_input.data() + i, user_input.size() - i); + __m128i word = _mm_loadu_si128((const __m128i*)buffer); + running = _mm_or_si128( + _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1), + _mm_cmpeq_epi8(word, mask2))), + _mm_cmpeq_epi8(word, mask3)); + } + return _mm_movemask_epi8(running) != 0; +} +#else +ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); @@ -9849,6 +9904,7 @@ ada_really_inline constexpr bool has_tabs_or_newline( } return running; } +#endif // A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, // U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), @@ -13732,8 +13788,11 @@ bool url_aggregator::set_hostname(const std::string_view input) { [[nodiscard]] std::string_view url_aggregator::get_host() const noexcept { ada_log("url_aggregator::get_host"); + // Technically, we should check if there is a hostname, but + // the code below works even if there isn't. + // if(!has_hostname()) { return ""; } size_t start = components.host_start; - if (buffer.size() > components.host_start && + if (components.host_end > components.host_start && buffer[components.host_start] == '@') { start++; } @@ -13747,9 +13806,12 @@ bool url_aggregator::set_hostname(const std::string_view input) { [[nodiscard]] std::string_view url_aggregator::get_hostname() const noexcept { ada_log("url_aggregator::get_hostname"); + // Technically, we should check if there is a hostname, but + // the code below works even if there isn't. + // if(!has_hostname()) { return ""; } size_t start = components.host_start; // So host_start is not where the host begins. - if (buffer.size() > components.host_start && + if (components.host_end > components.host_start && buffer[components.host_start] == '@') { start++; } @@ -14807,17 +14869,32 @@ struct ada_url_components { uint32_t hash_start; }; -ada_url ada_parse(const char* input) noexcept { +ada_url ada_parse(const char* input, size_t length) noexcept { return new ada::result( - ada::parse(input)); + ada::parse(std::string_view(input, length))); } -bool ada_can_parse(const char* input, const char* base) noexcept { - if (base == nullptr) { - return ada::can_parse(input); +ada_url ada_parse_with_base(const char* input, size_t input_length, + const char* base, size_t base_length) noexcept { + auto base_out = + ada::parse(std::string_view(base, base_length)); + + if (!base_out) { + return new ada::result(base_out); } - std::string_view sv(base); - return ada::can_parse(input, &sv); + + return new ada::result(ada::parse( + std::string_view(input, input_length), &base_out.value())); +} + +bool ada_can_parse(const char* input, size_t length) noexcept { + return ada::can_parse(std::string_view(input, length)); +} + +bool ada_can_parse_with_base(const char* input, size_t input_length, + const char* base, size_t base_length) noexcept { + auto base_view = std::string_view(base, base_length); + return ada::can_parse(std::string_view(input, input_length), &base_view); } void ada_free(ada_url result) noexcept { @@ -14943,81 +15020,86 @@ ada_string ada_get_protocol(ada_url result) noexcept { return ada_string_create(out.data(), out.length()); } -bool ada_set_href(ada_url result, const char* input) noexcept { +bool ada_set_href(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_href(input); + return r->set_href(std::string_view(input, length)); } -bool ada_set_host(ada_url result, const char* input) noexcept { +bool ada_set_host(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_host(input); + return r->set_host(std::string_view(input, length)); } -bool ada_set_hostname(ada_url result, const char* input) noexcept { +bool ada_set_hostname(ada_url result, const char* input, + size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_hostname(input); + return r->set_hostname(std::string_view(input, length)); } -bool ada_set_protocol(ada_url result, const char* input) noexcept { +bool ada_set_protocol(ada_url result, const char* input, + size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_protocol(input); + return r->set_protocol(std::string_view(input, length)); } -bool ada_set_username(ada_url result, const char* input) noexcept { +bool ada_set_username(ada_url result, const char* input, + size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_username(input); + return r->set_username(std::string_view(input, length)); } -bool ada_set_password(ada_url result, const char* input) noexcept { +bool ada_set_password(ada_url result, const char* input, + size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_password(input); + return r->set_password(std::string_view(input, length)); } -bool ada_set_port(ada_url result, const char* input) noexcept { +bool ada_set_port(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_port(input); + return r->set_port(std::string_view(input, length)); } -bool ada_set_pathname(ada_url result, const char* input) noexcept { +bool ada_set_pathname(ada_url result, const char* input, + size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { return false; } - return r->set_pathname(input); + return r->set_pathname(std::string_view(input, length)); } -void ada_set_search(ada_url result, const char* input) noexcept { +void ada_set_search(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { - r->set_search(input); + r->set_search(std::string_view(input, length)); } } -void ada_set_hash(ada_url result, const char* input) noexcept { +void ada_set_hash(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { - r->set_hash(input); + r->set_hash(std::string_view(input, length)); } } diff --git a/deps/ada/ada.h b/deps/ada/ada.h index 9f0df05e152e14..5afa7abc62d8f5 100644 --- a/deps/ada/ada.h +++ b/deps/ada/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-05-08 12:41:03 -0400. Do not edit! */ +/* auto-generated on 2023-05-16 13:48:47 -0400. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -468,6 +468,17 @@ namespace ada { if (!(COND)) __builtin_unreachable(); \ } while (0) #endif + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#define ADA_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define ADA_NEON 1 +#endif + #endif // ADA_COMMON_DEFS_H /* end file include/ada/common_defs.h */ #include @@ -4320,7 +4331,7 @@ std::string to_unicode(std::string_view input); * @attention The has_tabs_or_newline function is a bottleneck and it is simple * enough that compilers like GCC can 'autovectorize it'. */ -ada_really_inline constexpr bool has_tabs_or_newline( +ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept; /** @@ -6473,14 +6484,14 @@ inline std::ostream &operator<<(std::ostream &out, #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.4.0" +#define ADA_VERSION "2.4.1" namespace ada { enum { ADA_VERSION_MAJOR = 2, ADA_VERSION_MINOR = 4, - ADA_VERSION_REVISION = 0, + ADA_VERSION_REVISION = 1, }; } // namespace ada @@ -6508,11 +6519,11 @@ using result = tl::expected; /** * The URL parser takes a scalar value string input, with an optional null or - * base URL base (default null). The parser assumes the input has an UTF-8 - * encoding. + * base URL base (default null). The parser assumes the input is a valid ASCII + * or UTF-8 string. * - * @param input the string input to analyze. - * @param base_url the optional string input to use as a base url. + * @param input the string input to analyze (must be valid ASCII or UTF-8) + * @param base_url the optional URL input to use as a base url. * @return a parsed URL. */ template @@ -6525,6 +6536,8 @@ extern template ada::result parse( std::string_view input, const url_aggregator* base_url); /** + * Verifies whether the URL strings can be parsed. The function assumes + * that the inputs are valid ASCII or UTF-8 strings. * @see https://url.spec.whatwg.org/#dom-url-canparse * @return If URL can be parsed or not. */ @@ -6532,7 +6545,8 @@ bool can_parse(std::string_view input, const std::string_view* base_input = nullptr); /** - * Computes a href string from a file path. + * Computes a href string from a file path. The function assumes + * that the input is a valid ASCII or UTF-8 string. * @return a href string (starts with file:://) */ std::string href_from_file(std::string_view path); diff --git a/deps/ada/ada_c.h b/deps/ada/ada_c.h index 4c64a5aece5181..f8bcbdcd14d161 100644 --- a/deps/ada/ada_c.h +++ b/deps/ada/ada_c.h @@ -9,6 +9,10 @@ #include #include +// This is a reference to ada::url_components::omitted +// It represents "uint32_t(-1)" +#define ada_url_omitted 0xffffffff + // string that is owned by the ada_url instance typedef struct { const char* data; @@ -34,21 +38,25 @@ typedef struct { typedef void* ada_url; -// input should be a null terminated C string +// input should be a null terminated C string (ASCII or UTF-8) // you must call ada_free on the returned pointer -ada_url ada_parse(const char* string); +ada_url ada_parse(const char* input, size_t length); +ada_url ada_parse_with_base(const char* input, size_t input_length, + const char* base, size_t base_length); // input and base should be a null terminated C strings -bool ada_can_parse(const char* input, const char* base); +bool ada_can_parse(const char* input, size_t length); +bool ada_can_parse_with_base(const char* input, size_t input_length, + const char* base, size_t base_length); void ada_free(ada_url result); +void ada_free_owned_string(ada_owned_string owned); bool ada_is_valid(ada_url result); // url_aggregator getters // if ada_is_valid(result)) is false, an empty string is returned ada_owned_string ada_get_origin(ada_url result); -void ada_free_owned_string(ada_owned_string owned); ada_string ada_get_href(ada_url result); ada_string ada_get_username(ada_url result); ada_string ada_get_password(ada_url result); @@ -63,16 +71,16 @@ ada_string ada_get_protocol(ada_url result); // url_aggregator setters // if ada_is_valid(result)) is false, the setters have no effect // input should be a null terminated C string -bool ada_set_href(ada_url result, const char* input); -bool ada_set_host(ada_url result, const char* input); -bool ada_set_hostname(ada_url result, const char* input); -bool ada_set_protocol(ada_url result, const char* input); -bool ada_set_username(ada_url result, const char* input); -bool ada_set_password(ada_url result, const char* input); -bool ada_set_port(ada_url result, const char* input); -bool ada_set_pathname(ada_url result, const char* input); -void ada_set_search(ada_url result, const char* input); -void ada_set_hash(ada_url result, const char* input); +bool ada_set_href(ada_url result, const char* input, size_t length); +bool ada_set_host(ada_url result, const char* input, size_t length); +bool ada_set_hostname(ada_url result, const char* input, size_t length); +bool ada_set_protocol(ada_url result, const char* input, size_t length); +bool ada_set_username(ada_url result, const char* input, size_t length); +bool ada_set_password(ada_url result, const char* input, size_t length); +bool ada_set_port(ada_url result, const char* input, size_t length); +bool ada_set_pathname(ada_url result, const char* input, size_t length); +void ada_set_search(ada_url result, const char* input, size_t length); +void ada_set_hash(ada_url result, const char* input, size_t length); // url_aggregator functions // if ada_is_valid(result) is false, functions below will return false