Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String::remove_matches O(n^2) -> O(n) #83515

Merged
merged 2 commits into from
Jun 8, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 39 additions & 22 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ use core::fmt;
use core::hash;
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::iter::FusedIterator;
use core::iter::{from_fn, FusedIterator};
#[cfg(not(no_global_oom_handling))]
use core::ops::Add;
#[cfg(not(no_global_oom_handling))]
Expand Down Expand Up @@ -1290,32 +1290,49 @@ impl String {
{
use core::str::pattern::Searcher;

let matches = {
let rejections = {
let mut searcher = pat.into_searcher(self);
let mut matches = Vec::new();

while let Some(m) = searcher.next_match() {
matches.push(m);
}

matches
// Per Searcher::next:
//
// A Match result needs to contain the whole matched pattern,
// however Reject results may be split up into arbitrary many
// adjacent fragments. Both ranges may have zero length.
//
// In practice the implementation of Searcher::next_match tends to
// be more efficient, so we use it here and do some work to invert
// matches into rejections since that's what we want to copy below.
let mut front = 0;
let rejections: Vec<_> = from_fn(|| {
let (start, end) = searcher.next_match()?;
let prev_front = front;
front = end;
Some((prev_front, start))
})
.collect();
rejections.into_iter().chain(core::iter::once((front, self.len())))
};

let len = self.len();
let mut shrunk_by = 0;
let mut len = 0;
let ptr = self.vec.as_mut_ptr();

for (start, end) in rejections {
let count = end - start;
if start != len {
// SAFETY: per Searcher::next:
//
// The stream of Match and Reject values up to a Done will
// contain index ranges that are adjacent, non-overlapping,
// covering the whole haystack, and laying on utf8
// boundaries.
unsafe {
ptr::copy(ptr.add(start), ptr.add(len), count);
}
}
len += count;
}

// SAFETY: start and end will be on utf8 byte boundaries per
// the Searcher docs
unsafe {
for (start, end) in matches {
ptr::copy(
self.vec.as_mut_ptr().add(end - shrunk_by),
self.vec.as_mut_ptr().add(start - shrunk_by),
len - end,
);
shrunk_by += end - start;
}
self.vec.set_len(len - shrunk_by);
self.vec.set_len(len);
}
}

Expand Down