Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

further reduce unnecessary string copies #888

Merged
merged 2 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions include/ada/url_pattern-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#include "ada/url_pattern_helpers.h"
#include "ada/url_pattern.h"

#include <algorithm>
#include <string_view>
#include <utility>

namespace ada {

Expand Down Expand Up @@ -330,36 +332,34 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
// Set username to url’s username.
username = url->get_username();
username = std::move(url->get_username());
// Set password to url’s password.
password = url->get_password();
password = std::move(url->get_password());
// Set hostname to url’s host, serialized, or the empty string if the value
// is null.
hostname = url->get_hostname();
hostname = std::move(url->get_hostname());
// Set port to url’s port, serialized, or the empty string if the value is
// null.
port = url->get_port();
port = std::move(url->get_port());
// Set pathname to the result of URL path serializing url.
pathname = url->get_pathname();
pathname = std::move(url->get_pathname());
// Set search to url’s query or the empty string if the value is null.
// IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
// is removed. Similar work was done on workerd:
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
if (url->has_search()) {
auto view = url->get_search();
search = view.starts_with("?") ? url->get_search().substr(1) : view;
} else {
search = "";
search =
view.starts_with("?") ? url->get_search().substr(1) : std::move(view);
}
// Set hash to url’s fragment or the empty string if the value is null.
// IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
// removed. Similar work was done on workerd:
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
if (url->has_hash()) {
auto view = url->get_hash();
hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
} else {
hash = "";
hash =
view.starts_with("#") ? url->get_hash().substr(1) : std::move(view);
}
}

Expand Down
49 changes: 29 additions & 20 deletions include/ada/url_pattern_helpers-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,22 @@ inline std::string to_string(token_type type) {
#endif // ADA_TESTING

template <url_pattern_regex::regex_concept regex_provider>
void constructor_string_parser<regex_provider>::rewind() {
constexpr void constructor_string_parser<regex_provider>::rewind() {
// Set parser’s token index to parser’s component start.
token_index = component_start;
// Set parser’s token increment to 0.
token_increment = 0;
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_hash_prefix() {
constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index and "#".
return is_non_special_pattern_char(token_index, '#');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_search_prefix() {
constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
// If result of running is a non-special pattern char given parser, parser’s
// token index and "?" is true, then return true.
if (is_non_special_pattern_char(token_index, '?')) {
Expand Down Expand Up @@ -92,7 +92,8 @@ bool constructor_string_parser<regex_provider>::is_search_prefix() {
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
constexpr bool
constructor_string_parser<regex_provider>::is_non_special_pattern_char(
size_t index, uint32_t value) const {
// Let token be the result of running get a safe token given parser and index.
auto token = get_safe_token(index);
Expand All @@ -116,8 +117,8 @@ bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
}

template <url_pattern_regex::regex_concept regex_provider>
const token* constructor_string_parser<regex_provider>::get_safe_token(
size_t index) const {
constexpr const token*
constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
// If index is less than parser’s token list's size, then return parser’s
// token list[index].
if (index < token_list.size()) [[likely]] {
Expand All @@ -136,22 +137,24 @@ const token* constructor_string_parser<regex_provider>::get_safe_token(
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_group_open() const {
constexpr bool constructor_string_parser<regex_provider>::is_group_open()
const {
// If parser’s token list[parser’s token index]'s type is "open", then return
// true.
return token_list[token_index].type == token_type::OPEN;
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_group_close() const {
constexpr bool constructor_string_parser<regex_provider>::is_group_close()
const {
// If parser’s token list[parser’s token index]'s type is "close", then return
// true.
return token_list[token_index].type == token_type::CLOSE;
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::next_is_authority_slashes()
const {
constexpr bool
constructor_string_parser<regex_provider>::next_is_authority_slashes() const {
// If the result of running is a non-special pattern char given parser,
// parser’s token index + 1, and "/" is false, then return false.
if (!is_non_special_pattern_char(token_index + 1, '/')) {
Expand All @@ -166,7 +169,8 @@ bool constructor_string_parser<regex_provider>::next_is_authority_slashes()
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_protocol_suffix() const {
constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and ":".
return is_non_special_pattern_char(token_index, ':');
Expand Down Expand Up @@ -293,49 +297,54 @@ std::string constructor_string_parser<regex_provider>::make_component_string() {
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_an_identity_terminator()
const {
constexpr bool
constructor_string_parser<regex_provider>::is_an_identity_terminator() const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and "@".
return is_non_special_pattern_char(token_index, '@');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_pathname_start() const {
constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and "/".
return is_non_special_pattern_char(token_index, '/');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_password_prefix() const {
constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and ":".
return is_non_special_pattern_char(token_index, ':');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_an_ipv6_open() const {
constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and "[".
return is_non_special_pattern_char(token_index, '[');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_an_ipv6_close() const {
constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and "]".
return is_non_special_pattern_char(token_index, ']');
}

template <url_pattern_regex::regex_concept regex_provider>
bool constructor_string_parser<regex_provider>::is_port_prefix() const {
constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
const {
// Return the result of running is a non-special pattern char given parser,
// parser’s token index, and ":".
return is_non_special_pattern_char(token_index, ':');
}

inline void Tokenizer::get_next_code_point() {
constexpr void Tokenizer::get_next_code_point() {
ada_log("Tokenizer::get_next_code_point called with index=", next_index);
ADA_ASSERT_TRUE(next_index < input.size());
// this assumes that we have a valid, non-truncated UTF-8 stream.
Expand Down Expand Up @@ -382,7 +391,7 @@ inline void Tokenizer::get_next_code_point() {
next_index += number_bytes;
}

inline void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
new_index);
// Set tokenizer’s next index to index.
Expand Down
58 changes: 29 additions & 29 deletions include/ada/url_pattern_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ class Tokenizer {
: input(new_input), policy(new_policy) {}

// @see https://urlpattern.spec.whatwg.org/#get-the-next-code-point
void get_next_code_point();
constexpr void get_next_code_point();

// @see https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point
void seek_and_get_next_code_point(size_t index);
constexpr void seek_and_get_next_code_point(size_t index);

// @see https://urlpattern.spec.whatwg.org/#add-a-token

Expand Down Expand Up @@ -155,16 +155,6 @@ struct constructor_string_parser {
explicit constructor_string_parser(std::string_view new_input,
std::vector<token>&& new_token_list)
: input(new_input), token_list(std::move(new_token_list)) {}

// @see https://urlpattern.spec.whatwg.org/#rewind
void rewind();

// @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix
bool is_hash_prefix();

// @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix
bool is_search_prefix();

// @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string
static tl::expected<url_pattern_init, errors> parse(std::string_view input);

Expand All @@ -183,49 +173,59 @@ struct constructor_string_parser {
DONE,
};

// @see
// https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag
std::optional<errors> compute_protocol_matches_special_scheme_flag();

private:
// @see https://urlpattern.spec.whatwg.org/#rewind
constexpr void rewind();

// @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix
constexpr bool is_hash_prefix();

// @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix
constexpr bool is_search_prefix();

// @see https://urlpattern.spec.whatwg.org/#change-state
void change_state(State state, size_t skip);

// @see https://urlpattern.spec.whatwg.org/#is-a-group-open
bool is_group_open() const;
constexpr bool is_group_open() const;

// @see https://urlpattern.spec.whatwg.org/#is-a-group-close
bool is_group_close() const;
constexpr bool is_group_close() const;

// @see https://urlpattern.spec.whatwg.org/#is-a-protocol-suffix
bool is_protocol_suffix() const;

// @see
// https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag
std::optional<errors> compute_protocol_matches_special_scheme_flag();
constexpr bool is_protocol_suffix() const;

// @see https://urlpattern.spec.whatwg.org/#next-is-authority-slashes
bool next_is_authority_slashes() const;
constexpr bool next_is_authority_slashes() const;

// @see https://urlpattern.spec.whatwg.org/#is-an-identity-terminator
bool is_an_identity_terminator() const;
constexpr bool is_an_identity_terminator() const;

// @see https://urlpattern.spec.whatwg.org/#is-a-pathname-start
bool is_pathname_start() const;
constexpr bool is_pathname_start() const;

// @see https://urlpattern.spec.whatwg.org/#is-a-password-prefix
bool is_password_prefix() const;
constexpr bool is_password_prefix() const;

// @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-open
bool is_an_ipv6_open() const;
constexpr bool is_an_ipv6_open() const;

// @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-close
bool is_an_ipv6_close() const;
constexpr bool is_an_ipv6_close() const;

// @see https://urlpattern.spec.whatwg.org/#is-a-port-prefix
bool is_port_prefix() const;
constexpr bool is_port_prefix() const;

private:
// @see https://urlpattern.spec.whatwg.org/#is-a-non-special-pattern-char
bool is_non_special_pattern_char(size_t index, uint32_t value) const;
constexpr bool is_non_special_pattern_char(size_t index,
uint32_t value) const;

// @see https://urlpattern.spec.whatwg.org/#get-a-safe-token
const token* get_safe_token(size_t index) const;
constexpr const token* get_safe_token(size_t index) const;

// @see https://urlpattern.spec.whatwg.org/#make-a-component-string
std::string make_component_string();
Expand Down
5 changes: 2 additions & 3 deletions src/url_pattern_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ generate_regular_expression_and_name_list(

// Let name list be a new list
std::vector<std::string> name_list{};
const std::string full_wildcard_regexp_value = ".*";

// For each part of part list:
for (const url_pattern_part& part : part_list) {
Expand Down Expand Up @@ -61,7 +60,7 @@ generate_regular_expression_and_name_list(
// Otherwise if part's type is "full-wildcard"
else if (part.type == url_pattern_part_type::FULL_WILDCARD) {
// then set regexp value to full wildcard regexp value.
regexp_value = full_wildcard_regexp_value;
regexp_value = ".*";
}

// If part's prefix is the empty string and part's suffix is the empty
Expand Down Expand Up @@ -140,7 +139,7 @@ generate_regular_expression_and_name_list(
result += "$";

// Return (result, name list)
return {result, name_list};
return {std::move(result), std::move(name_list)};
}

bool is_ipv6_address(std::string_view input) noexcept {
Expand Down
Loading