Skip to content

Commit

Permalink
Conditionally parse named capture groups (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
raskad authored Jan 28, 2024
1 parent 77f02b3 commit 649478b
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 20 deletions.
13 changes: 12 additions & 1 deletion src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,8 @@ where
}
}

'k' => {
// [+NamedCaptureGroups] k GroupName
'k' if self.flags.unicode || !self.named_group_indices.is_empty() => {
self.consume('k');

// The sequence `\k` must be the start of a backreference to a named capture group.
Expand All @@ -967,6 +968,16 @@ where
error("Unexpected end of named backreference")
}
}

// [~NamedCaptureGroups] k GroupName
'k' => {
self.consume('k');
Ok(ir::Node::Char {
c: self.fold_if_icase(c),
icase: self.flags.icase,
})
}

_ => {
let c = self.consume_character_escape()?;
Ok(ir::Node::Char {
Expand Down
52 changes: 33 additions & 19 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1205,28 +1205,42 @@ fn run_regexp_named_capture_groups_tc(tc: TestConfig) {
}

#[test]
#[rustfmt::skip]
fn run_regexp_named_groups_unicode_malformed_tc() {
fn run_regexp_named_groups_unicode_malformed() {
test_with_configs(run_regexp_named_groups_unicode_malformed_tc)
}

fn run_regexp_named_groups_unicode_malformed_tc(tc: TestConfig) {
// From 262 test/annexB/built-ins/RegExp/named-groups/non-unicode-malformed-lookbehind.js
test_parse_fails(r#"\k<a>(?<=>)a"#);
test_parse_fails(r#"(?<=>)\k<a>"#);
test_parse_fails(r#"\k<a>(?<!a)a"#);
test_parse_fails(r#"(?<!a>)\k<a>"#);
tc.compile(r#"\k<a>(?<=>)a"#).test_succeeds(r#"k<a>a"#);
tc.compile(r#"(?<=>)\k<a>"#).test_succeeds(r#">k<a>"#);
tc.compile(r#"\k<a>(?<!a)a"#).test_succeeds(r#"k<a>a"#);
tc.compile(r#"(?<!a>)\k<a>"#).test_succeeds(r#"k<a>"#);

// Negative parse tests in unicode mode.
test_parse_fails_flags(r#"\k<a>(?<=>)a"#, "u");
test_parse_fails_flags(r#"(?<=>)\k<a>"#, "u");
test_parse_fails_flags(r#"\k<a>(?<!a)a"#, "u");
test_parse_fails_flags(r#"(?<!a>)\k<a>"#, "u");

// From 262 test/annexB/built-ins/RegExp/named-groups/non-unicode-malformed.js
test_parse_fails(r#"\k<a>"#);
test_parse_fails(r#"\k<4>"#);
test_parse_fails(r#"\k<a"#);
test_parse_fails(r#"\k"#);

// TODO: This test fails, because we accept alphabetic ascii characters in otherwise invalid escapes, due to PCRE tests.
//test_parse_fails(r#"(?<a>\a)"#);

test_parse_fails(r#"\k<a>"#);
test_parse_fails(r#"\k<a"#);
test_parse_fails(r#"\k<a>(<a>x)"#);
test_parse_fails(r#"\k<a>\1"#);
test_parse_fails(r#"\1(b)\k<a>"#);
tc.compile(r#"\k<a>"#).test_succeeds(r#"k<a>"#);
tc.compile(r#"\k<4>"#).test_succeeds(r#"k<4>"#);
tc.compile(r#"\k<a"#).test_succeeds(r#"k<a"#);
tc.compile(r#"\k"#).test_succeeds(r#"k"#);
tc.compile(r#"(?<a>\a)"#).test_succeeds(r#"a"#);
tc.compile(r#"\k<a>(<a>x)"#).test_succeeds(r#"k<a><a>x"#);
tc.compile(r#"\k<a>\1"#).test_succeeds("k<a>\u{1}");
tc.compile(r#"\1(b)\k<a>"#).test_succeeds(r#"bk<a>"#);

// Negative parse tests in unicode mode.
test_parse_fails_flags(r#"\k<a>"#, "u");
test_parse_fails_flags(r#"\k<4>"#, "u");
test_parse_fails_flags(r#"\k<a"#, "u");
test_parse_fails_flags(r#"\k"#, "u");
test_parse_fails_flags(r#"(?<a>\a)"#, "u");
test_parse_fails_flags(r#"\k<a>(<a>x)"#, "u");
test_parse_fails_flags(r#"\k<a>\1"#, "u");
test_parse_fails_flags(r#"\1(b)\k<a>"#, "u");

// From 262 test/language/literals/regexp/named-groups/invalid-duplicate-groupspecifier.js
test_parse_fails(r#"(?<a>a)(?<a>a)"#);
Expand Down

0 comments on commit 649478b

Please sign in to comment.