Skip to content

Commit

Permalink
moving functions around
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Sep 18, 2024
1 parent 916b8d4 commit eb54e04
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 270 deletions.
1 change: 1 addition & 0 deletions include/ada.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "ada/url_base-inl.h"
#include "ada/url-inl.h"
#include "ada/url_components.h"
#include "ada/url_components-inl.h"
#include "ada/url_aggregator.h"
#include "ada/url_aggregator-inl.h"
#include "ada/url_search_params.h"
Expand Down
4 changes: 4 additions & 0 deletions include/ada/url-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
return path.size();
}

[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept {
return path;
}

[[nodiscard]] ada_really_inline ada::url_components url::get_components()
const noexcept {
url_components out{};
Expand Down
192 changes: 189 additions & 3 deletions include/ada/url_aggregator-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,8 @@ url_aggregator::get_components() const noexcept {
return components;
}

[[nodiscard]] constexpr bool ada::url_aggregator::has_authority() const noexcept {
[[nodiscard]] constexpr bool ada::url_aggregator::has_authority()
const noexcept {
ada_log("url_aggregator::has_authority");
// Performance: instead of doing this potentially expensive check, we could
// have a boolean in the struct.
Expand Down Expand Up @@ -844,8 +845,8 @@ constexpr bool url_aggregator::has_port() const noexcept {
buffer[components.host_end + 1] == '.';
}

[[nodiscard]] constexpr std::string_view url_aggregator::get_href() const noexcept
ada_lifetime_bound {
[[nodiscard]] constexpr std::string_view url_aggregator::get_href()
const noexcept ada_lifetime_bound {
ada_log("url_aggregator::get_href");
return buffer;
}
Expand Down Expand Up @@ -919,6 +920,191 @@ constexpr void url_aggregator::set_protocol_as_file() {
ADA_ASSERT_TRUE(validate());
}

[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept {
if (!is_valid) {
return true;
}
if (!components.check_offset_consistency()) {
ada_log("url_aggregator::validate inconsistent components \n",
to_diagram());
return false;
}
// We have a credible components struct, but let us investivate more
// carefully:
/**
* https://user:pass@example.com:1234/foo/bar?baz#quux
* | | | | ^^^^| | |
* | | | | | | | `----- hash_start
* | | | | | | `--------- search_start
* | | | | | `----------------- pathname_start
* | | | | `--------------------- port
* | | | `----------------------- host_end
* | | `---------------------------------- host_start
* | `--------------------------------------- username_end
* `--------------------------------------------- protocol_end
*/
if (components.protocol_end == url_components::omitted) {
ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram());
return false;
}
if (components.username_end == url_components::omitted) {
ada_log("url_aggregator::validate omitted username_end \n", to_diagram());
return false;
}
if (components.host_start == url_components::omitted) {
ada_log("url_aggregator::validate omitted host_start \n", to_diagram());
return false;
}
if (components.host_end == url_components::omitted) {
ada_log("url_aggregator::validate omitted host_end \n", to_diagram());
return false;
}
if (components.pathname_start == url_components::omitted) {
ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram());
return false;
}

if (components.protocol_end > buffer.size()) {
ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram());
return false;
}
if (components.username_end > buffer.size()) {
ada_log("url_aggregator::validate username_end overflow \n", to_diagram());
return false;
}
if (components.host_start > buffer.size()) {
ada_log("url_aggregator::validate host_start overflow \n", to_diagram());
return false;
}
if (components.host_end > buffer.size()) {
ada_log("url_aggregator::validate host_end overflow \n", to_diagram());
return false;
}
if (components.pathname_start > buffer.size()) {
ada_log("url_aggregator::validate pathname_start overflow \n",
to_diagram());
return false;
}

if (components.protocol_end > 0) {
if (buffer[components.protocol_end - 1] != ':') {
ada_log(
"url_aggregator::validate missing : at the end of the protocol \n",
to_diagram());
return false;
}
}

if (components.username_end != buffer.size() &&
components.username_end > components.protocol_end + 2) {
if (buffer[components.username_end] != ':' &&
buffer[components.username_end] != '@') {
ada_log(
"url_aggregator::validate missing : or @ at the end of the username "
"\n",
to_diagram());
return false;
}
}

if (components.host_start != buffer.size()) {
if (components.host_start > components.username_end) {
if (buffer[components.host_start] != '@') {
ada_log(
"url_aggregator::validate missing @ at the end of the password \n",
to_diagram());
return false;
}
} else if (components.host_start == components.username_end &&
components.host_end > components.host_start) {
if (components.host_start == components.protocol_end + 2) {
if (buffer[components.protocol_end] != '/' ||
buffer[components.protocol_end + 1] != '/') {
ada_log(
"url_aggregator::validate missing // between protocol and host "
"\n",
to_diagram());
return false;
}
} else {
if (components.host_start > components.protocol_end &&
buffer[components.host_start] != '@') {
ada_log(
"url_aggregator::validate missing @ at the end of the username "
"\n",
to_diagram());
return false;
}
}
} else {
if (components.host_end != components.host_start) {
ada_log("url_aggregator::validate expected omitted host \n",
to_diagram());
return false;
}
}
}
if (components.host_end != buffer.size() &&
components.pathname_start > components.host_end) {
if (components.pathname_start == components.host_end + 2 &&
buffer[components.host_end] == '/' &&
buffer[components.host_end + 1] == '.') {
if (components.pathname_start + 1 >= buffer.size() ||
buffer[components.pathname_start] != '/' ||
buffer[components.pathname_start + 1] != '/') {
ada_log(
"url_aggregator::validate expected the path to begin with // \n",
to_diagram());
return false;
}
} else if (buffer[components.host_end] != ':') {
ada_log("url_aggregator::validate missing : at the port \n",
to_diagram());
return false;
}
}
if (components.pathname_start != buffer.size() &&
components.pathname_start < components.search_start &&
components.pathname_start < components.hash_start && !has_opaque_path) {
if (buffer[components.pathname_start] != '/') {
ada_log("url_aggregator::validate missing / at the path \n",
to_diagram());
return false;
}
}
if (components.search_start != url_components::omitted) {
if (buffer[components.search_start] != '?') {
ada_log("url_aggregator::validate missing ? at the search \n",
to_diagram());
return false;
}
}
if (components.hash_start != url_components::omitted) {
if (buffer[components.hash_start] != '#') {
ada_log("url_aggregator::validate missing # at the hash \n",
to_diagram());
return false;
}
}

return true;
}

[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
const noexcept ada_lifetime_bound {
ada_log("url_aggregator::get_pathname pathname_start = ",
components.pathname_start, " buffer.size() = ", buffer.size(),
" components.search_start = ", components.search_start,
" components.hash_start = ", components.hash_start);
auto ending_index = uint32_t(buffer.size());
if (components.search_start != url_components::omitted) {
ending_index = components.search_start;
} else if (components.hash_start != url_components::omitted) {
ending_index = components.hash_start;
}
return helpers::substring(buffer, components.pathname_start, ending_index);
}

inline std::ostream &operator<<(std::ostream &out,
const ada::url_aggregator &u) {
return out << u.to_string();
Expand Down
3 changes: 2 additions & 1 deletion include/ada/url_aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ struct url_aggregator : url_base {
* A URL includes credentials if its username or password is not the empty
* string.
*/
[[nodiscard]] ada_really_inline constexpr bool has_credentials() const noexcept;
[[nodiscard]] ada_really_inline constexpr bool has_credentials()
const noexcept;

/**
* Useful for implementing efficient serialization for the URL.
Expand Down
3 changes: 2 additions & 1 deletion include/ada/url_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

namespace ada {

[[nodiscard]] ada_really_inline constexpr bool url_base::is_special() const noexcept {
[[nodiscard]] ada_really_inline constexpr bool url_base::is_special()
const noexcept {
return type != ada::scheme::NOT_SPECIAL;
}

Expand Down
88 changes: 88 additions & 0 deletions include/ada/url_components-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* @file url_components.h
* @brief Declaration for the URL Components
*/
#ifndef ADA_URL_COMPONENTS_INL_H
#define ADA_URL_COMPONENTS_INL_H

#include "ada/url_components.h"

namespace ada {

[[nodiscard]] constexpr bool url_components::check_offset_consistency() const noexcept {
/**
* https://user:pass@example.com:1234/foo/bar?baz#quux
* | | | | ^^^^| | |
* | | | | | | | `----- hash_start
* | | | | | | `--------- search_start
* | | | | | `----------------- pathname_start
* | | | | `--------------------- port
* | | | `----------------------- host_end
* | | `---------------------------------- host_start
* | `--------------------------------------- username_end
* `--------------------------------------------- protocol_end
*/
// These conditions can be made more strict.
uint32_t index = 0;

if (protocol_end == url_components::omitted) {
return false;
}
if (protocol_end < index) {
return false;
}
index = protocol_end;

if (username_end == url_components::omitted) {
return false;
}
if (username_end < index) {
return false;
}
index = username_end;

if (host_start == url_components::omitted) {
return false;
}
if (host_start < index) {
return false;
}
index = host_start;

if (port != url_components::omitted) {
if (port > 0xffff) {
return false;
}
uint32_t port_length = helpers::fast_digit_count(port) + 1;
if (index + port_length < index) {
return false;
}
index += port_length;
}

if (pathname_start == url_components::omitted) {
return false;
}
if (pathname_start < index) {
return false;
}
index = pathname_start;

if (search_start != url_components::omitted) {
if (search_start < index) {
return false;
}
index = search_start;
}

if (hash_start != url_components::omitted) {
if (hash_start < index) {
return false;
}
}

return true;
}

} // namespace ada
#endif
1 change: 0 additions & 1 deletion include/ada/url_components.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,5 @@ struct url_components {
[[nodiscard]] std::string to_string() const;

}; // struct url_components

} // namespace ada
#endif
4 changes: 0 additions & 4 deletions src/url-getters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ namespace ada {
return host.value_or("");
}

[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept {
return path;
}

[[nodiscard]] std::string url::get_search() const noexcept {
// If this's URL's query is either null or the empty string, then return the
// empty string. Return U+003F (?), followed by this's URL's query.
Expand Down
Loading

0 comments on commit eb54e04

Please sign in to comment.