From eb54e0429b145497f8a060cdfef944ccf025682f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 18 Sep 2024 14:02:38 -0400 Subject: [PATCH] moving functions around --- include/ada.h | 1 + include/ada/url-inl.h | 4 + include/ada/url_aggregator-inl.h | 192 ++++++++++++++++++++++++++++++- include/ada/url_aggregator.h | 3 +- include/ada/url_base-inl.h | 3 +- include/ada/url_components-inl.h | 88 ++++++++++++++ include/ada/url_components.h | 1 - src/url-getters.cpp | 4 - src/url_aggregator.cpp | 186 +----------------------------- src/url_components.cpp | 75 ------------ 10 files changed, 287 insertions(+), 270 deletions(-) create mode 100644 include/ada/url_components-inl.h diff --git a/include/ada.h b/include/ada.h index 667c7fcaf..c5d0946ec 100644 --- a/include/ada.h +++ b/include/ada.h @@ -21,6 +21,7 @@ #include "ada/url_base-inl.h" #include "ada/url-inl.h" #include "ada/url_components.h" +#include "ada/url_components-inl.h" #include "ada/url_aggregator.h" #include "ada/url_aggregator-inl.h" #include "ada/url_search_params.h" diff --git a/include/ada/url-inl.h b/include/ada/url-inl.h index 361717929..863e6c5e7 100644 --- a/include/ada/url-inl.h +++ b/include/ada/url-inl.h @@ -43,6 +43,10 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) { return path.size(); } +[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept { + return path; +} + [[nodiscard]] ada_really_inline ada::url_components url::get_components() const noexcept { url_components out{}; diff --git a/include/ada/url_aggregator-inl.h b/include/ada/url_aggregator-inl.h index 5826221ac..483214a75 100644 --- a/include/ada/url_aggregator-inl.h +++ b/include/ada/url_aggregator-inl.h @@ -732,7 +732,8 @@ url_aggregator::get_components() const noexcept { return components; } -[[nodiscard]] constexpr bool ada::url_aggregator::has_authority() const noexcept { +[[nodiscard]] constexpr bool ada::url_aggregator::has_authority() + const noexcept { ada_log("url_aggregator::has_authority"); // Performance: instead of doing this potentially expensive check, we could // have a boolean in the struct. @@ -844,8 +845,8 @@ constexpr bool url_aggregator::has_port() const noexcept { buffer[components.host_end + 1] == '.'; } -[[nodiscard]] constexpr std::string_view url_aggregator::get_href() const noexcept - ada_lifetime_bound { +[[nodiscard]] constexpr std::string_view url_aggregator::get_href() + const noexcept ada_lifetime_bound { ada_log("url_aggregator::get_href"); return buffer; } @@ -919,6 +920,191 @@ constexpr void url_aggregator::set_protocol_as_file() { ADA_ASSERT_TRUE(validate()); } +[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept { + if (!is_valid) { + return true; + } + if (!components.check_offset_consistency()) { + ada_log("url_aggregator::validate inconsistent components \n", + to_diagram()); + return false; + } + // We have a credible components struct, but let us investivate more + // carefully: + /** + * https://user:pass@example.com:1234/foo/bar?baz#quux + * | | | | ^^^^| | | + * | | | | | | | `----- hash_start + * | | | | | | `--------- search_start + * | | | | | `----------------- pathname_start + * | | | | `--------------------- port + * | | | `----------------------- host_end + * | | `---------------------------------- host_start + * | `--------------------------------------- username_end + * `--------------------------------------------- protocol_end + */ + if (components.protocol_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram()); + return false; + } + if (components.username_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted username_end \n", to_diagram()); + return false; + } + if (components.host_start == url_components::omitted) { + ada_log("url_aggregator::validate omitted host_start \n", to_diagram()); + return false; + } + if (components.host_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted host_end \n", to_diagram()); + return false; + } + if (components.pathname_start == url_components::omitted) { + ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram()); + return false; + } + + if (components.protocol_end > buffer.size()) { + ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram()); + return false; + } + if (components.username_end > buffer.size()) { + ada_log("url_aggregator::validate username_end overflow \n", to_diagram()); + return false; + } + if (components.host_start > buffer.size()) { + ada_log("url_aggregator::validate host_start overflow \n", to_diagram()); + return false; + } + if (components.host_end > buffer.size()) { + ada_log("url_aggregator::validate host_end overflow \n", to_diagram()); + return false; + } + if (components.pathname_start > buffer.size()) { + ada_log("url_aggregator::validate pathname_start overflow \n", + to_diagram()); + return false; + } + + if (components.protocol_end > 0) { + if (buffer[components.protocol_end - 1] != ':') { + ada_log( + "url_aggregator::validate missing : at the end of the protocol \n", + to_diagram()); + return false; + } + } + + if (components.username_end != buffer.size() && + components.username_end > components.protocol_end + 2) { + if (buffer[components.username_end] != ':' && + buffer[components.username_end] != '@') { + ada_log( + "url_aggregator::validate missing : or @ at the end of the username " + "\n", + to_diagram()); + return false; + } + } + + if (components.host_start != buffer.size()) { + if (components.host_start > components.username_end) { + if (buffer[components.host_start] != '@') { + ada_log( + "url_aggregator::validate missing @ at the end of the password \n", + to_diagram()); + return false; + } + } else if (components.host_start == components.username_end && + components.host_end > components.host_start) { + if (components.host_start == components.protocol_end + 2) { + if (buffer[components.protocol_end] != '/' || + buffer[components.protocol_end + 1] != '/') { + ada_log( + "url_aggregator::validate missing // between protocol and host " + "\n", + to_diagram()); + return false; + } + } else { + if (components.host_start > components.protocol_end && + buffer[components.host_start] != '@') { + ada_log( + "url_aggregator::validate missing @ at the end of the username " + "\n", + to_diagram()); + return false; + } + } + } else { + if (components.host_end != components.host_start) { + ada_log("url_aggregator::validate expected omitted host \n", + to_diagram()); + return false; + } + } + } + if (components.host_end != buffer.size() && + components.pathname_start > components.host_end) { + if (components.pathname_start == components.host_end + 2 && + buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') { + if (components.pathname_start + 1 >= buffer.size() || + buffer[components.pathname_start] != '/' || + buffer[components.pathname_start + 1] != '/') { + ada_log( + "url_aggregator::validate expected the path to begin with // \n", + to_diagram()); + return false; + } + } else if (buffer[components.host_end] != ':') { + ada_log("url_aggregator::validate missing : at the port \n", + to_diagram()); + return false; + } + } + if (components.pathname_start != buffer.size() && + components.pathname_start < components.search_start && + components.pathname_start < components.hash_start && !has_opaque_path) { + if (buffer[components.pathname_start] != '/') { + ada_log("url_aggregator::validate missing / at the path \n", + to_diagram()); + return false; + } + } + if (components.search_start != url_components::omitted) { + if (buffer[components.search_start] != '?') { + ada_log("url_aggregator::validate missing ? at the search \n", + to_diagram()); + return false; + } + } + if (components.hash_start != url_components::omitted) { + if (buffer[components.hash_start] != '#') { + ada_log("url_aggregator::validate missing # at the hash \n", + to_diagram()); + return false; + } + } + + return true; +} + +[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() + const noexcept ada_lifetime_bound { + ada_log("url_aggregator::get_pathname pathname_start = ", + components.pathname_start, " buffer.size() = ", buffer.size(), + " components.search_start = ", components.search_start, + " components.hash_start = ", components.hash_start); + auto ending_index = uint32_t(buffer.size()); + if (components.search_start != url_components::omitted) { + ending_index = components.search_start; + } else if (components.hash_start != url_components::omitted) { + ending_index = components.hash_start; + } + return helpers::substring(buffer, components.pathname_start, ending_index); +} + inline std::ostream &operator<<(std::ostream &out, const ada::url_aggregator &u) { return out << u.to_string(); diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index d64558e2b..8c71a7395 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -144,7 +144,8 @@ struct url_aggregator : url_base { * A URL includes credentials if its username or password is not the empty * string. */ - [[nodiscard]] ada_really_inline constexpr bool has_credentials() const noexcept; + [[nodiscard]] ada_really_inline constexpr bool has_credentials() + const noexcept; /** * Useful for implementing efficient serialization for the URL. diff --git a/include/ada/url_base-inl.h b/include/ada/url_base-inl.h index 05fa875fa..948205eb2 100644 --- a/include/ada/url_base-inl.h +++ b/include/ada/url_base-inl.h @@ -21,7 +21,8 @@ namespace ada { -[[nodiscard]] ada_really_inline constexpr bool url_base::is_special() const noexcept { +[[nodiscard]] ada_really_inline constexpr bool url_base::is_special() + const noexcept { return type != ada::scheme::NOT_SPECIAL; } diff --git a/include/ada/url_components-inl.h b/include/ada/url_components-inl.h new file mode 100644 index 000000000..8af91ff9f --- /dev/null +++ b/include/ada/url_components-inl.h @@ -0,0 +1,88 @@ +/** + * @file url_components.h + * @brief Declaration for the URL Components + */ +#ifndef ADA_URL_COMPONENTS_INL_H +#define ADA_URL_COMPONENTS_INL_H + +#include "ada/url_components.h" + +namespace ada { + +[[nodiscard]] constexpr bool url_components::check_offset_consistency() const noexcept { + /** + * https://user:pass@example.com:1234/foo/bar?baz#quux + * | | | | ^^^^| | | + * | | | | | | | `----- hash_start + * | | | | | | `--------- search_start + * | | | | | `----------------- pathname_start + * | | | | `--------------------- port + * | | | `----------------------- host_end + * | | `---------------------------------- host_start + * | `--------------------------------------- username_end + * `--------------------------------------------- protocol_end + */ + // These conditions can be made more strict. + uint32_t index = 0; + + if (protocol_end == url_components::omitted) { + return false; + } + if (protocol_end < index) { + return false; + } + index = protocol_end; + + if (username_end == url_components::omitted) { + return false; + } + if (username_end < index) { + return false; + } + index = username_end; + + if (host_start == url_components::omitted) { + return false; + } + if (host_start < index) { + return false; + } + index = host_start; + + if (port != url_components::omitted) { + if (port > 0xffff) { + return false; + } + uint32_t port_length = helpers::fast_digit_count(port) + 1; + if (index + port_length < index) { + return false; + } + index += port_length; + } + + if (pathname_start == url_components::omitted) { + return false; + } + if (pathname_start < index) { + return false; + } + index = pathname_start; + + if (search_start != url_components::omitted) { + if (search_start < index) { + return false; + } + index = search_start; + } + + if (hash_start != url_components::omitted) { + if (hash_start < index) { + return false; + } + } + + return true; +} + +} // namespace ada +#endif diff --git a/include/ada/url_components.h b/include/ada/url_components.h index 86d6acee2..a72767bb0 100644 --- a/include/ada/url_components.h +++ b/include/ada/url_components.h @@ -75,6 +75,5 @@ struct url_components { [[nodiscard]] std::string to_string() const; }; // struct url_components - } // namespace ada #endif diff --git a/src/url-getters.cpp b/src/url-getters.cpp index c1289b4f3..54a7b2a34 100644 --- a/src/url-getters.cpp +++ b/src/url-getters.cpp @@ -62,10 +62,6 @@ namespace ada { return host.value_or(""); } -[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept { - return path; -} - [[nodiscard]] std::string url::get_search() const noexcept { // If this's URL's query is either null or the empty string, then return the // empty string. Return U+003F (?), followed by this's URL's query. diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index a9ade551c..515362af1 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -5,6 +5,7 @@ #include "ada/scheme.h" #include "ada/unicode-inl.h" #include "ada/url_components.h" +#include "ada/url_components-inl.h" #include "ada/url_aggregator.h" #include "ada/url_aggregator-inl.h" @@ -723,21 +724,6 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, start, components.host_end); } -[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const noexcept - ada_lifetime_bound { - ada_log("url_aggregator::get_pathname pathname_start = ", - components.pathname_start, " buffer.size() = ", buffer.size(), - " components.search_start = ", components.search_start, - " components.hash_start = ", components.hash_start); - auto ending_index = uint32_t(buffer.size()); - if (components.search_start != url_components::omitted) { - ending_index = components.search_start; - } else if (components.hash_start != url_components::omitted) { - ending_index = components.hash_start; - } - return helpers::substring(buffer, components.pathname_start, ending_index); -} - [[nodiscard]] std::string_view url_aggregator::get_search() const noexcept ada_lifetime_bound { ada_log("url_aggregator::get_search"); @@ -1377,176 +1363,6 @@ bool url_aggregator::parse_opaque_host(std::string_view input) { return answer; } -[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept { - if (!is_valid) { - return true; - } - if (!components.check_offset_consistency()) { - ada_log("url_aggregator::validate inconsistent components \n", - to_diagram()); - return false; - } - // We have a credible components struct, but let us investivate more - // carefully: - /** - * https://user:pass@example.com:1234/foo/bar?baz#quux - * | | | | ^^^^| | | - * | | | | | | | `----- hash_start - * | | | | | | `--------- search_start - * | | | | | `----------------- pathname_start - * | | | | `--------------------- port - * | | | `----------------------- host_end - * | | `---------------------------------- host_start - * | `--------------------------------------- username_end - * `--------------------------------------------- protocol_end - */ - if (components.protocol_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram()); - return false; - } - if (components.username_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted username_end \n", to_diagram()); - return false; - } - if (components.host_start == url_components::omitted) { - ada_log("url_aggregator::validate omitted host_start \n", to_diagram()); - return false; - } - if (components.host_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted host_end \n", to_diagram()); - return false; - } - if (components.pathname_start == url_components::omitted) { - ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram()); - return false; - } - - if (components.protocol_end > buffer.size()) { - ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram()); - return false; - } - if (components.username_end > buffer.size()) { - ada_log("url_aggregator::validate username_end overflow \n", to_diagram()); - return false; - } - if (components.host_start > buffer.size()) { - ada_log("url_aggregator::validate host_start overflow \n", to_diagram()); - return false; - } - if (components.host_end > buffer.size()) { - ada_log("url_aggregator::validate host_end overflow \n", to_diagram()); - return false; - } - if (components.pathname_start > buffer.size()) { - ada_log("url_aggregator::validate pathname_start overflow \n", - to_diagram()); - return false; - } - - if (components.protocol_end > 0) { - if (buffer[components.protocol_end - 1] != ':') { - ada_log( - "url_aggregator::validate missing : at the end of the protocol \n", - to_diagram()); - return false; - } - } - - if (components.username_end != buffer.size() && - components.username_end > components.protocol_end + 2) { - if (buffer[components.username_end] != ':' && - buffer[components.username_end] != '@') { - ada_log( - "url_aggregator::validate missing : or @ at the end of the username " - "\n", - to_diagram()); - return false; - } - } - - if (components.host_start != buffer.size()) { - if (components.host_start > components.username_end) { - if (buffer[components.host_start] != '@') { - ada_log( - "url_aggregator::validate missing @ at the end of the password \n", - to_diagram()); - return false; - } - } else if (components.host_start == components.username_end && - components.host_end > components.host_start) { - if (components.host_start == components.protocol_end + 2) { - if (buffer[components.protocol_end] != '/' || - buffer[components.protocol_end + 1] != '/') { - ada_log( - "url_aggregator::validate missing // between protocol and host " - "\n", - to_diagram()); - return false; - } - } else { - if (components.host_start > components.protocol_end && - buffer[components.host_start] != '@') { - ada_log( - "url_aggregator::validate missing @ at the end of the username " - "\n", - to_diagram()); - return false; - } - } - } else { - if (components.host_end != components.host_start) { - ada_log("url_aggregator::validate expected omitted host \n", - to_diagram()); - return false; - } - } - } - if (components.host_end != buffer.size() && - components.pathname_start > components.host_end) { - if (components.pathname_start == components.host_end + 2 && - buffer[components.host_end] == '/' && - buffer[components.host_end + 1] == '.') { - if (components.pathname_start + 1 >= buffer.size() || - buffer[components.pathname_start] != '/' || - buffer[components.pathname_start + 1] != '/') { - ada_log( - "url_aggregator::validate expected the path to begin with // \n", - to_diagram()); - return false; - } - } else if (buffer[components.host_end] != ':') { - ada_log("url_aggregator::validate missing : at the port \n", - to_diagram()); - return false; - } - } - if (components.pathname_start != buffer.size() && - components.pathname_start < components.search_start && - components.pathname_start < components.hash_start && !has_opaque_path) { - if (buffer[components.pathname_start] != '/') { - ada_log("url_aggregator::validate missing / at the path \n", - to_diagram()); - return false; - } - } - if (components.search_start != url_components::omitted) { - if (buffer[components.search_start] != '?') { - ada_log("url_aggregator::validate missing ? at the search \n", - to_diagram()); - return false; - } - } - if (components.hash_start != url_components::omitted) { - if (buffer[components.hash_start] != '#') { - ada_log("url_aggregator::validate missing # at the hash \n", - to_diagram()); - return false; - } - } - - return true; -} - void url_aggregator::delete_dash_dot() { ada_log("url_aggregator::delete_dash_dot"); ADA_ASSERT_TRUE(validate()); diff --git a/src/url_components.cpp b/src/url_components.cpp index f4cc9e8af..40508f4e0 100644 --- a/src/url_components.cpp +++ b/src/url_components.cpp @@ -7,81 +7,6 @@ namespace ada { -[[nodiscard]] constexpr bool url_components::check_offset_consistency() const noexcept { - /** - * https://user:pass@example.com:1234/foo/bar?baz#quux - * | | | | ^^^^| | | - * | | | | | | | `----- hash_start - * | | | | | | `--------- search_start - * | | | | | `----------------- pathname_start - * | | | | `--------------------- port - * | | | `----------------------- host_end - * | | `---------------------------------- host_start - * | `--------------------------------------- username_end - * `--------------------------------------------- protocol_end - */ - // These conditions can be made more strict. - uint32_t index = 0; - - if (protocol_end == url_components::omitted) { - return false; - } - if (protocol_end < index) { - return false; - } - index = protocol_end; - - if (username_end == url_components::omitted) { - return false; - } - if (username_end < index) { - return false; - } - index = username_end; - - if (host_start == url_components::omitted) { - return false; - } - if (host_start < index) { - return false; - } - index = host_start; - - if (port != url_components::omitted) { - if (port > 0xffff) { - return false; - } - uint32_t port_length = helpers::fast_digit_count(port) + 1; - if (index + port_length < index) { - return false; - } - index += port_length; - } - - if (pathname_start == url_components::omitted) { - return false; - } - if (pathname_start < index) { - return false; - } - index = pathname_start; - - if (search_start != url_components::omitted) { - if (search_start < index) { - return false; - } - index = search_start; - } - - if (hash_start != url_components::omitted) { - if (hash_start < index) { - return false; - } - } - - return true; -} - [[nodiscard]] std::string url_components::to_string() const { std::string answer; auto back = std::back_insert_iterator(answer);