From 385e93da98ee3d804d11d07d99d240b7d84bdd1e Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 17 Feb 2025 12:13:14 -0500 Subject: [PATCH 1/2] further reduce unnecessary string copies --- include/ada/url_pattern-inl.h | 22 +++++++++++----------- src/url_pattern_helpers.cpp | 5 ++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/ada/url_pattern-inl.h b/include/ada/url_pattern-inl.h index a4912243e..c9e04fe1e 100644 --- a/include/ada/url_pattern-inl.h +++ b/include/ada/url_pattern-inl.h @@ -9,7 +9,9 @@ #include "ada/url_pattern_helpers.h" #include "ada/url_pattern.h" +#include #include +#include namespace ada { @@ -330,26 +332,25 @@ result> url_pattern::match( // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038 protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1); // Set username to url’s username. - username = url->get_username(); + username = std::move(url->get_username()); // Set password to url’s password. - password = url->get_password(); + password = std::move(url->get_password()); // Set hostname to url’s host, serialized, or the empty string if the value // is null. - hostname = url->get_hostname(); + hostname = std::move(url->get_hostname()); // Set port to url’s port, serialized, or the empty string if the value is // null. - port = url->get_port(); + port = std::move(url->get_port()); // Set pathname to the result of URL path serializing url. - pathname = url->get_pathname(); + pathname = std::move(url->get_pathname()); // Set search to url’s query or the empty string if the value is null. // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?' // is removed. Similar work was done on workerd: // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232 if (url->has_search()) { auto view = url->get_search(); - search = view.starts_with("?") ? url->get_search().substr(1) : view; - } else { - search = ""; + search = + view.starts_with("?") ? url->get_search().substr(1) : std::move(view); } // Set hash to url’s fragment or the empty string if the value is null. // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is @@ -357,9 +358,8 @@ result> url_pattern::match( // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242 if (url->has_hash()) { auto view = url->get_hash(); - hash = view.starts_with("#") ? url->get_hash().substr(1) : view; - } else { - hash = ""; + hash = + view.starts_with("#") ? url->get_hash().substr(1) : std::move(view); } } diff --git a/src/url_pattern_helpers.cpp b/src/url_pattern_helpers.cpp index 49093df0c..53dcdfa68 100644 --- a/src/url_pattern_helpers.cpp +++ b/src/url_pattern_helpers.cpp @@ -15,7 +15,6 @@ generate_regular_expression_and_name_list( // Let name list be a new list std::vector name_list{}; - const std::string full_wildcard_regexp_value = ".*"; // For each part of part list: for (const url_pattern_part& part : part_list) { @@ -61,7 +60,7 @@ generate_regular_expression_and_name_list( // Otherwise if part's type is "full-wildcard" else if (part.type == url_pattern_part_type::FULL_WILDCARD) { // then set regexp value to full wildcard regexp value. - regexp_value = full_wildcard_regexp_value; + regexp_value = ".*"; } // If part's prefix is the empty string and part's suffix is the empty @@ -140,7 +139,7 @@ generate_regular_expression_and_name_list( result += "$"; // Return (result, name list) - return {result, name_list}; + return {std::move(result), std::move(name_list)}; } bool is_ipv6_address(std::string_view input) noexcept { From 5c465bd021cb58676c49708ac4cf30e909546ccc Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 17 Feb 2025 12:21:14 -0500 Subject: [PATCH 2/2] make more functions private and constexpr --- include/ada/url_pattern_helpers-inl.h | 49 +++++++++++++--------- include/ada/url_pattern_helpers.h | 58 +++++++++++++-------------- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/include/ada/url_pattern_helpers-inl.h b/include/ada/url_pattern_helpers-inl.h index 46a6ce9ed..72b494a1c 100644 --- a/include/ada/url_pattern_helpers-inl.h +++ b/include/ada/url_pattern_helpers-inl.h @@ -44,7 +44,7 @@ inline std::string to_string(token_type type) { #endif // ADA_TESTING template -void constructor_string_parser::rewind() { +constexpr void constructor_string_parser::rewind() { // Set parser’s token index to parser’s component start. token_index = component_start; // Set parser’s token increment to 0. @@ -52,14 +52,14 @@ void constructor_string_parser::rewind() { } template -bool constructor_string_parser::is_hash_prefix() { +constexpr bool constructor_string_parser::is_hash_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index and "#". return is_non_special_pattern_char(token_index, '#'); } template -bool constructor_string_parser::is_search_prefix() { +constexpr bool constructor_string_parser::is_search_prefix() { // If result of running is a non-special pattern char given parser, parser’s // token index and "?" is true, then return true. if (is_non_special_pattern_char(token_index, '?')) { @@ -92,7 +92,8 @@ bool constructor_string_parser::is_search_prefix() { } template -bool constructor_string_parser::is_non_special_pattern_char( +constexpr bool +constructor_string_parser::is_non_special_pattern_char( size_t index, uint32_t value) const { // Let token be the result of running get a safe token given parser and index. auto token = get_safe_token(index); @@ -116,8 +117,8 @@ bool constructor_string_parser::is_non_special_pattern_char( } template -const token* constructor_string_parser::get_safe_token( - size_t index) const { +constexpr const token* +constructor_string_parser::get_safe_token(size_t index) const { // If index is less than parser’s token list's size, then return parser’s // token list[index]. if (index < token_list.size()) [[likely]] { @@ -136,22 +137,24 @@ const token* constructor_string_parser::get_safe_token( } template -bool constructor_string_parser::is_group_open() const { +constexpr bool constructor_string_parser::is_group_open() + const { // If parser’s token list[parser’s token index]'s type is "open", then return // true. return token_list[token_index].type == token_type::OPEN; } template -bool constructor_string_parser::is_group_close() const { +constexpr bool constructor_string_parser::is_group_close() + const { // If parser’s token list[parser’s token index]'s type is "close", then return // true. return token_list[token_index].type == token_type::CLOSE; } template -bool constructor_string_parser::next_is_authority_slashes() - const { +constexpr bool +constructor_string_parser::next_is_authority_slashes() const { // If the result of running is a non-special pattern char given parser, // parser’s token index + 1, and "/" is false, then return false. if (!is_non_special_pattern_char(token_index + 1, '/')) { @@ -166,7 +169,8 @@ bool constructor_string_parser::next_is_authority_slashes() } template -bool constructor_string_parser::is_protocol_suffix() const { +constexpr bool constructor_string_parser::is_protocol_suffix() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ':'); @@ -293,49 +297,54 @@ std::string constructor_string_parser::make_component_string() { } template -bool constructor_string_parser::is_an_identity_terminator() - const { +constexpr bool +constructor_string_parser::is_an_identity_terminator() const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "@". return is_non_special_pattern_char(token_index, '@'); } template -bool constructor_string_parser::is_pathname_start() const { +constexpr bool constructor_string_parser::is_pathname_start() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "/". return is_non_special_pattern_char(token_index, '/'); } template -bool constructor_string_parser::is_password_prefix() const { +constexpr bool constructor_string_parser::is_password_prefix() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ':'); } template -bool constructor_string_parser::is_an_ipv6_open() const { +constexpr bool constructor_string_parser::is_an_ipv6_open() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "[". return is_non_special_pattern_char(token_index, '['); } template -bool constructor_string_parser::is_an_ipv6_close() const { +constexpr bool constructor_string_parser::is_an_ipv6_close() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "]". return is_non_special_pattern_char(token_index, ']'); } template -bool constructor_string_parser::is_port_prefix() const { +constexpr bool constructor_string_parser::is_port_prefix() + const { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ':'); } -inline void Tokenizer::get_next_code_point() { +constexpr void Tokenizer::get_next_code_point() { ada_log("Tokenizer::get_next_code_point called with index=", next_index); ADA_ASSERT_TRUE(next_index < input.size()); // this assumes that we have a valid, non-truncated UTF-8 stream. @@ -382,7 +391,7 @@ inline void Tokenizer::get_next_code_point() { next_index += number_bytes; } -inline void Tokenizer::seek_and_get_next_code_point(size_t new_index) { +constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) { ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=", new_index); // Set tokenizer’s next index to index. diff --git a/include/ada/url_pattern_helpers.h b/include/ada/url_pattern_helpers.h index a8fdd8078..b485fe7c4 100644 --- a/include/ada/url_pattern_helpers.h +++ b/include/ada/url_pattern_helpers.h @@ -109,10 +109,10 @@ class Tokenizer { : input(new_input), policy(new_policy) {} // @see https://urlpattern.spec.whatwg.org/#get-the-next-code-point - void get_next_code_point(); + constexpr void get_next_code_point(); // @see https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point - void seek_and_get_next_code_point(size_t index); + constexpr void seek_and_get_next_code_point(size_t index); // @see https://urlpattern.spec.whatwg.org/#add-a-token @@ -155,16 +155,6 @@ struct constructor_string_parser { explicit constructor_string_parser(std::string_view new_input, std::vector&& new_token_list) : input(new_input), token_list(std::move(new_token_list)) {} - - // @see https://urlpattern.spec.whatwg.org/#rewind - void rewind(); - - // @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix - bool is_hash_prefix(); - - // @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix - bool is_search_prefix(); - // @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string static tl::expected parse(std::string_view input); @@ -183,49 +173,59 @@ struct constructor_string_parser { DONE, }; + // @see + // https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag + std::optional compute_protocol_matches_special_scheme_flag(); + + private: + // @see https://urlpattern.spec.whatwg.org/#rewind + constexpr void rewind(); + + // @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix + constexpr bool is_hash_prefix(); + + // @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix + constexpr bool is_search_prefix(); + // @see https://urlpattern.spec.whatwg.org/#change-state void change_state(State state, size_t skip); // @see https://urlpattern.spec.whatwg.org/#is-a-group-open - bool is_group_open() const; + constexpr bool is_group_open() const; // @see https://urlpattern.spec.whatwg.org/#is-a-group-close - bool is_group_close() const; + constexpr bool is_group_close() const; // @see https://urlpattern.spec.whatwg.org/#is-a-protocol-suffix - bool is_protocol_suffix() const; - - // @see - // https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag - std::optional compute_protocol_matches_special_scheme_flag(); + constexpr bool is_protocol_suffix() const; // @see https://urlpattern.spec.whatwg.org/#next-is-authority-slashes - bool next_is_authority_slashes() const; + constexpr bool next_is_authority_slashes() const; // @see https://urlpattern.spec.whatwg.org/#is-an-identity-terminator - bool is_an_identity_terminator() const; + constexpr bool is_an_identity_terminator() const; // @see https://urlpattern.spec.whatwg.org/#is-a-pathname-start - bool is_pathname_start() const; + constexpr bool is_pathname_start() const; // @see https://urlpattern.spec.whatwg.org/#is-a-password-prefix - bool is_password_prefix() const; + constexpr bool is_password_prefix() const; // @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-open - bool is_an_ipv6_open() const; + constexpr bool is_an_ipv6_open() const; // @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-close - bool is_an_ipv6_close() const; + constexpr bool is_an_ipv6_close() const; // @see https://urlpattern.spec.whatwg.org/#is-a-port-prefix - bool is_port_prefix() const; + constexpr bool is_port_prefix() const; - private: // @see https://urlpattern.spec.whatwg.org/#is-a-non-special-pattern-char - bool is_non_special_pattern_char(size_t index, uint32_t value) const; + constexpr bool is_non_special_pattern_char(size_t index, + uint32_t value) const; // @see https://urlpattern.spec.whatwg.org/#get-a-safe-token - const token* get_safe_token(size_t index) const; + constexpr const token* get_safe_token(size_t index) const; // @see https://urlpattern.spec.whatwg.org/#make-a-component-string std::string make_component_string();