Skip to content

Commit 9626f16

Browse files
committed
progress
1 parent f7ff34f commit 9626f16

14 files changed

+107
-802
lines changed

Cargo.lock

+9-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ members = [
4848
"crates/ignore",
4949
]
5050

51+
[patch.crates-io]
52+
regex = { path = "/home/andrew/rust/regex" }
53+
regex-automata = { path = "/home/andrew/rust/regex/regex-automata" }
54+
regex-syntax = { path = "/home/andrew/rust/regex/regex-syntax" }
55+
5156
[dependencies]
5257
bstr = "1.6.0"
5358
grep = { version = "0.2.12", path = "crates/grep" }

crates/globset/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ log = { version = "0.4.20", optional = true }
2626
serde = { version = "1.0.188", optional = true }
2727

2828
[dependencies.regex-syntax]
29-
version = "0.7.5"
29+
version = "0.8.0"
3030
default-features = false
3131
features = ["std"]
3232

3333
[dependencies.regex-automata]
34-
version = "0.3.8"
34+
version = "0.4.0"
3535
default-features = false
3636
features = ["std", "perf", "syntax", "meta", "nfa", "hybrid"]
3737

crates/ignore/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ same-file = "1.0.6"
2727
walkdir = "2.4.0"
2828

2929
[dependencies.regex-automata]
30-
version = "0.3.8"
30+
version = "0.4.0"
3131
default-features = false
3232
features = ["std", "perf", "syntax", "meta", "nfa", "hybrid", "dfa-onepass"]
3333

crates/regex/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ edition = "2021"
1717
bstr = "1.6.2"
1818
grep-matcher = { version = "0.1.6", path = "../matcher" }
1919
log = "0.4.20"
20-
regex-automata = { version = "0.3.8" }
21-
regex-syntax = "0.7.5"
20+
regex-automata = { version = "0.4.0" }
21+
regex-syntax = "0.8.0"

crates/regex/src/ast.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ impl AstAnalysis {
6262
Ast::Flags(_)
6363
| Ast::Dot(_)
6464
| Ast::Assertion(_)
65-
| Ast::Class(ast::Class::Unicode(_))
66-
| Ast::Class(ast::Class::Perl(_)) => {}
65+
| Ast::ClassUnicode(_)
66+
| Ast::ClassPerl(_) => {}
6767
Ast::Literal(ref x) => {
6868
self.from_ast_literal(x);
6969
}
70-
Ast::Class(ast::Class::Bracketed(ref x)) => {
70+
Ast::ClassBracketed(ref x) => {
7171
self.from_ast_class_set(&x.kind);
7272
}
7373
Ast::Repetition(ref x) => {

crates/regex/src/config.rs

+13-78
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use {
33
regex_automata::meta::Regex,
44
regex_syntax::{
55
ast,
6-
hir::{self, Hir, HirKind},
6+
hir::{self, Hir},
77
},
88
};
99

@@ -296,35 +296,6 @@ impl ConfiguredHIR {
296296
}
297297
}
298298

299-
/// Turns this configured HIR into one that only matches when both sides of
300-
/// the match correspond to a word boundary.
301-
///
302-
/// Note that the HIR returned is like turning `pat` into
303-
/// `(?m:^|\W)(pat)(?m:$|\W)`. That is, the true match is at capture group
304-
/// `1` and not `0`.
305-
pub(crate) fn into_word(self) -> Result<ConfiguredHIR, Error> {
306-
// In theory building the HIR for \W should never fail, but there are
307-
// likely some pathological cases (particularly with respect to certain
308-
// values of limits) where it could in theory fail.
309-
let non_word = {
310-
let mut config = self.config.clone();
311-
config.fixed_strings = false;
312-
ConfiguredHIR::new(config, &[r"\W"])?
313-
};
314-
let line_anchor_start = Hir::look(self.line_anchor_start());
315-
let line_anchor_end = Hir::look(self.line_anchor_end());
316-
let hir = Hir::concat(vec![
317-
Hir::alternation(vec![line_anchor_start, non_word.hir.clone()]),
318-
Hir::capture(hir::Capture {
319-
index: 1,
320-
name: None,
321-
sub: Box::new(renumber_capture_indices(self.hir)?),
322-
}),
323-
Hir::alternation(vec![non_word.hir, line_anchor_end]),
324-
]);
325-
Ok(ConfiguredHIR { config: self.config, hir })
326-
}
327-
328299
/// Turns this configured HIR into an equivalent one, but where it must
329300
/// match at the start and end of a line.
330301
pub(crate) fn into_whole_line(self) -> ConfiguredHIR {
@@ -336,12 +307,20 @@ impl ConfiguredHIR {
336307
}
337308

338309
/// Turns this configured HIR into an equivalent one, but where it must
339-
/// match at the start and end of the haystack.
340-
pub(crate) fn into_anchored(self) -> ConfiguredHIR {
310+
/// match at word boundaries.
311+
pub(crate) fn into_word(self) -> ConfiguredHIR {
341312
let hir = Hir::concat(vec![
342-
Hir::look(hir::Look::Start),
313+
Hir::look(if self.config.unicode {
314+
hir::Look::WordStartHalfUnicode
315+
} else {
316+
hir::Look::WordStartHalfAscii
317+
}),
343318
self.hir,
344-
Hir::look(hir::Look::End),
319+
Hir::look(if self.config.unicode {
320+
hir::Look::WordEndHalfUnicode
321+
} else {
322+
hir::Look::WordEndHalfAscii
323+
}),
345324
]);
346325
ConfiguredHIR { config: self.config, hir }
347326
}
@@ -365,50 +344,6 @@ impl ConfiguredHIR {
365344
}
366345
}
367346

368-
/// This increments the index of every capture group in the given hir by 1. If
369-
/// any increment results in an overflow, then an error is returned.
370-
fn renumber_capture_indices(hir: Hir) -> Result<Hir, Error> {
371-
Ok(match hir.into_kind() {
372-
HirKind::Empty => Hir::empty(),
373-
HirKind::Literal(hir::Literal(lit)) => Hir::literal(lit),
374-
HirKind::Class(cls) => Hir::class(cls),
375-
HirKind::Look(x) => Hir::look(x),
376-
HirKind::Repetition(mut x) => {
377-
x.sub = Box::new(renumber_capture_indices(*x.sub)?);
378-
Hir::repetition(x)
379-
}
380-
HirKind::Capture(mut cap) => {
381-
cap.index = match cap.index.checked_add(1) {
382-
Some(index) => index,
383-
None => {
384-
// This error message kind of sucks, but it's probably
385-
// impossible for it to happen. The only way a capture
386-
// index can overflow addition is if the regex is huge
387-
// (or something else has gone horribly wrong).
388-
let msg = "could not renumber capture index, too big";
389-
return Err(Error::any(msg));
390-
}
391-
};
392-
cap.sub = Box::new(renumber_capture_indices(*cap.sub)?);
393-
Hir::capture(cap)
394-
}
395-
HirKind::Concat(subs) => {
396-
let subs = subs
397-
.into_iter()
398-
.map(|sub| renumber_capture_indices(sub))
399-
.collect::<Result<Vec<Hir>, Error>>()?;
400-
Hir::concat(subs)
401-
}
402-
HirKind::Alternation(subs) => {
403-
let subs = subs
404-
.into_iter()
405-
.map(|sub| renumber_capture_indices(sub))
406-
.collect::<Result<Vec<Hir>, Error>>()?;
407-
Hir::alternation(subs)
408-
}
409-
})
410-
}
411-
412347
/// Returns true if the given literal string contains any byte from the line
413348
/// terminator given.
414349
fn has_line_terminator(lineterm: LineTerminator, literal: &str) -> bool {

crates/regex/src/error.rs

-4
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ impl Error {
3030
Error { kind: ErrorKind::Regex(err.to_string()) }
3131
}
3232

33-
pub(crate) fn any<E: ToString>(msg: E) -> Error {
34-
Error { kind: ErrorKind::Regex(msg.to_string()) }
35-
}
36-
3733
/// Return the kind of this error.
3834
pub fn kind(&self) -> &ErrorKind {
3935
&self.kind

crates/regex/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,3 @@ mod literal;
1515
mod matcher;
1616
mod non_matching;
1717
mod strip;
18-
mod word;

0 commit comments

Comments
 (0)