diff --git a/deps/ada/ada.cpp b/deps/ada/ada.cpp index 1070ef6814f816..dc090542e3e1a5 100644 --- a/deps/ada/ada.cpp +++ b/deps/ada/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-04-20 18:39:35 -0400. Do not edit! */ +/* auto-generated on 2023-04-26 16:43:37 -0400. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -116,12 +116,13 @@ ada_really_inline constexpr bool verify_dns_length( ADA_PUSH_DISABLE_ALL_WARNINGS /* begin file src/ada_idna.cpp */ -/* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */ +/* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */ /* begin file src/idna.cpp */ /* begin file src/unicode_transcoding.cpp */ #include #include + namespace ada::idna { size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) { @@ -2750,7 +2751,9 @@ uint32_t find_range_index(uint32_t key) { } bool ascii_has_upper_case(char* input, size_t length) { - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; uint64_t broadcast_80 = broadcast(0x80); uint64_t broadcast_Ap = broadcast(128 - 'A'); uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1); @@ -2772,7 +2775,9 @@ bool ascii_has_upper_case(char* input, size_t length) { } void ascii_map(char* input, size_t length) { - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; uint64_t broadcast_80 = broadcast(0x80); uint64_t broadcast_Ap = broadcast(128 - 'A'); uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1); @@ -7999,9 +8004,10 @@ const char32_t uninorms::decomposition_data[] = { namespace ada::idna { void normalize(std::u32string& input) { - // [Normalize](https://www.unicode.org/reports/tr46/#ProcessingStepNormalize). - // Normalize - // the domain_name string to Unicode Normalization Form C. + /** + * Normalize the domain_name string to Unicode Normalization Form C. + * @see https://www.unicode.org/reports/tr46/#ProcessingStepNormalize + */ ufal::unilib::uninorms::nfc(input); } @@ -8229,7 +8235,6 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) { } // namespace ada::idna /* end file src/punycode.cpp */ /* begin file src/validity.cpp */ - #include #include @@ -9617,18 +9622,18 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = { static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); -inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept { +inline bool is_forbidden_domain_code_point(const char c) noexcept { return is_forbidden_domain_code_point_table[uint8_t(c)]; } -// We return "" on error. For now. -std::string from_ascii_to_ascii(std::string_view ut8_string) { - static const std::string error = ""; - if (std::any_of(ut8_string.begin(), ut8_string.end(), - is_forbidden_domain_code_point)) { - return error; - } +bool contains_forbidden_domain_code_point(std::string_view view) { + return ( + std::any_of(view.begin(), view.end(), is_forbidden_domain_code_point)); +} +// We return "" on error. +static std::string from_ascii_to_ascii(std::string_view ut8_string) { + static const std::string error = ""; // copy and map // we could be more efficient by avoiding the copy when unnecessary. std::string mapped_string = std::string(ut8_string); @@ -9682,7 +9687,7 @@ std::string from_ascii_to_ascii(std::string_view ut8_string) { return out; } -// We return "" on error. For now. +// We return "" on error. std::string to_ascii(std::string_view ut8_string) { if (is_ascii(ut8_string)) { return from_ascii_to_ascii(ut8_string); @@ -9769,11 +9774,6 @@ std::string to_ascii(std::string_view ut8_string) { out.push_back('.'); } } - - if (std::any_of(out.begin(), out.end(), is_forbidden_domain_code_point)) { - return error; - } - return out; } } // namespace ada::idna @@ -9842,7 +9842,9 @@ ADA_POP_DISABLE_WARNINGS namespace ada::unicode { constexpr bool to_lower_ascii(char* input, size_t length) noexcept { - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; uint64_t broadcast_80 = broadcast(0x80); uint64_t broadcast_Ap = broadcast(128 - 'A'); uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1); @@ -9873,7 +9875,9 @@ ada_really_inline constexpr bool has_tabs_or_newline( auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); }; - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; size_t i = 0; uint64_t mask1 = broadcast('\r'); uint64_t mask2 = broadcast('\n'); @@ -10252,7 +10256,8 @@ bool to_ascii(std::optional& out, const std::string_view plain, } // input is a non-empty UTF-8 string, must be percent decoded std::string idna_ascii = ada::idna::to_ascii(input); - if (idna_ascii.empty()) { + if (idna_ascii.empty() || contains_forbidden_domain_code_point( + idna_ascii.data(), idna_ascii.size())) { return false; } out = std::move(idna_ascii); @@ -10627,7 +10632,9 @@ ada_really_inline size_t find_next_host_delimiter_special( auto index_of_first_set_byte = [](uint64_t v) { return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; }; - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; size_t i = location; uint64_t mask1 = broadcast(':'); uint64_t mask2 = broadcast('/'); @@ -10690,7 +10697,9 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view, auto index_of_first_set_byte = [](uint64_t v) { return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; }; - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; size_t i = location; uint64_t mask1 = broadcast(':'); uint64_t mask2 = broadcast('/'); @@ -11016,7 +11025,9 @@ find_authority_delimiter_special(std::string_view view) noexcept { auto index_of_first_set_byte = [](uint64_t v) { return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; }; - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; size_t i = 0; uint64_t mask1 = broadcast('@'); uint64_t mask2 = broadcast('/'); @@ -11064,7 +11075,9 @@ find_authority_delimiter(std::string_view view) noexcept { auto index_of_first_set_byte = [](uint64_t v) { return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; }; - auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; }; + auto broadcast = [](uint8_t v) -> uint64_t { + return 0x101010101010101ull * v; + }; size_t i = 0; uint64_t mask1 = broadcast('@'); uint64_t mask2 = broadcast('/'); diff --git a/deps/ada/ada.h b/deps/ada/ada.h index 625194ddb423e9..296547569f9b44 100644 --- a/deps/ada/ada.h +++ b/deps/ada/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-04-20 18:39:35 -0400. Do not edit! */ +/* auto-generated on 2023-04-26 16:43:37 -0400. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -8,7 +8,7 @@ #define ADA_H /* begin file include/ada/ada_idna.h */ -/* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */ +/* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */ /* begin file include/idna.h */ #ifndef ADA_IDNA_H #define ADA_IDNA_H @@ -40,6 +40,7 @@ size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output); #include #include + namespace ada::idna { // If the input is ascii, then the mapping is just -> lower case. @@ -59,6 +60,7 @@ std::u32string map(std::u32string_view input); #include #include + namespace ada::idna { // Normalize the characters according to IDNA (Unicode Normalization Form C). @@ -73,6 +75,7 @@ void normalize(std::u32string& input); #include #include + namespace ada::idna { bool punycode_to_utf32(std::string_view input, std::u32string& out); @@ -109,14 +112,24 @@ bool is_label_valid(const std::u32string_view label); #include namespace ada::idna { + // Converts a domain (e.g., www.google.com) possibly containing international // characters to an ascii domain (with punycode). It will not do percent // decoding: percent decoding should be done prior to calling this function. We // do not remove tabs and spaces, they should have been removed prior to calling // this function. We also do not trim control characters. We also assume that -// the input is not empty. We return "" on error. For now. +// the input is not empty. We return "" on error. +// +// Example: "www.öbb.at" -> "www.xn--bb-eka.at" +// +// This function may accept or even produce invalid domains. std::string to_ascii(std::string_view ut8_string); +// Returns true if the string contains a forbidden code point according to the +// WHATGL URL specification: +// https://url.spec.whatwg.org/#forbidden-domain-code-point +bool contains_forbidden_domain_code_point(std::string_view ascii_string); + bool constexpr begins_with(std::u32string_view view, std::u32string_view prefix); bool constexpr begins_with(std::string_view view, std::string_view prefix); @@ -124,8 +137,6 @@ bool constexpr begins_with(std::string_view view, std::string_view prefix); bool constexpr is_ascii(std::u32string_view view); bool constexpr is_ascii(std::string_view view); -std::string from_ascii_to_ascii(std::string_view ut8_string); - } // namespace ada::idna #endif // ADA_IDNA_TO_ASCII_H @@ -135,8 +146,12 @@ std::string from_ascii_to_ascii(std::string_view ut8_string); #ifndef ADA_IDNA_TO_UNICODE_H #define ADA_IDNA_TO_UNICODE_H +#include + namespace ada::idna { + std::string to_unicode(std::string_view input); + } // namespace ada::idna #endif // ADA_IDNA_TO_UNICODE_H @@ -5138,6 +5153,7 @@ struct url : url_base { [[nodiscard]] inline bool has_hash() const noexcept override; /** @return true if the URL has a search component */ [[nodiscard]] inline bool has_search() const noexcept override; + private: friend ada::url ada::parser::parse_url(std::string_view, const ada::url *); @@ -5305,8 +5321,7 @@ namespace ada { [[nodiscard]] ada_really_inline bool url::has_credentials() const noexcept { return !username.empty() || !password.empty(); } -[[nodiscard]] ada_really_inline bool url::has_port() - const noexcept { +[[nodiscard]] ada_really_inline bool url::has_port() const noexcept { return port.has_value(); } [[nodiscard]] inline bool url::cannot_have_credentials_or_port() const { @@ -5439,9 +5454,13 @@ inline void url::clear_pathname() { path.clear(); } inline void url::clear_search() { query = std::nullopt; } -[[nodiscard]] inline bool url::has_hash() const noexcept { return hash.has_value(); } +[[nodiscard]] inline bool url::has_hash() const noexcept { + return hash.has_value(); +} -[[nodiscard]] inline bool url::has_search() const noexcept { return query.has_value(); } +[[nodiscard]] inline bool url::has_search() const noexcept { + return query.has_value(); +} inline void url::set_protocol_as_file() { type = ada::scheme::type::FILE; } @@ -6456,13 +6475,13 @@ inline std::ostream &operator<<(std::ostream &out, #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.2.0" +#define ADA_VERSION "2.3.0" namespace ada { enum { ADA_VERSION_MAJOR = 2, - ADA_VERSION_MINOR = 2, + ADA_VERSION_MINOR = 3, ADA_VERSION_REVISION = 0, };