Conditionally parse named capture groups (#83)

ridiculousfish · Jan 28, 2024 · 649478b · 649478b
1 parent 77f02b3
commit 649478b
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 20 deletions.
diff --git a/src/parse.rs b/src/parse.rs
@@ -950,7 +950,8 @@ where
                 }
             }
 
-            'k' => {
+            // [+NamedCaptureGroups] k GroupName
+            'k' if self.flags.unicode || !self.named_group_indices.is_empty() => {
                 self.consume('k');
 
                 // The sequence `\k` must be the start of a backreference to a named capture group.
@@ -967,6 +968,16 @@ where
                     error("Unexpected end of named backreference")
                 }
             }
+
+            // [~NamedCaptureGroups] k GroupName
+            'k' => {
+                self.consume('k');
+                Ok(ir::Node::Char {
+                    c: self.fold_if_icase(c),
+                    icase: self.flags.icase,
+                })
+            }
+
             _ => {
                 let c = self.consume_character_escape()?;
                 Ok(ir::Node::Char {

diff --git a/tests/tests.rs b/tests/tests.rs
@@ -1205,28 +1205,42 @@ fn run_regexp_named_capture_groups_tc(tc: TestConfig) {
 }
 
 #[test]
-#[rustfmt::skip]
-fn run_regexp_named_groups_unicode_malformed_tc() {
+fn run_regexp_named_groups_unicode_malformed() {
+    test_with_configs(run_regexp_named_groups_unicode_malformed_tc)
+}
+
+fn run_regexp_named_groups_unicode_malformed_tc(tc: TestConfig) {
     // From 262 test/annexB/built-ins/RegExp/named-groups/non-unicode-malformed-lookbehind.js
-    test_parse_fails(r#"\k<a>(?<=>)a"#);
-    test_parse_fails(r#"(?<=>)\k<a>"#);
-    test_parse_fails(r#"\k<a>(?<!a)a"#);
-    test_parse_fails(r#"(?<!a>)\k<a>"#);
+    tc.compile(r#"\k<a>(?<=>)a"#).test_succeeds(r#"k<a>a"#);
+    tc.compile(r#"(?<=>)\k<a>"#).test_succeeds(r#">k<a>"#);
+    tc.compile(r#"\k<a>(?<!a)a"#).test_succeeds(r#"k<a>a"#);
+    tc.compile(r#"(?<!a>)\k<a>"#).test_succeeds(r#"k<a>"#);
+
+    // Negative parse tests in unicode mode.
+    test_parse_fails_flags(r#"\k<a>(?<=>)a"#, "u");
+    test_parse_fails_flags(r#"(?<=>)\k<a>"#, "u");
+    test_parse_fails_flags(r#"\k<a>(?<!a)a"#, "u");
+    test_parse_fails_flags(r#"(?<!a>)\k<a>"#, "u");
 
     // From 262 test/annexB/built-ins/RegExp/named-groups/non-unicode-malformed.js
-    test_parse_fails(r#"\k<a>"#);
-    test_parse_fails(r#"\k<4>"#);
-    test_parse_fails(r#"\k<a"#);
-    test_parse_fails(r#"\k"#);
-
-    // TODO: This test fails, because we accept alphabetic ascii characters in otherwise invalid escapes, due to PCRE tests.
-    //test_parse_fails(r#"(?<a>\a)"#);
-
-    test_parse_fails(r#"\k<a>"#);
-    test_parse_fails(r#"\k<a"#);
-    test_parse_fails(r#"\k<a>(<a>x)"#);
-    test_parse_fails(r#"\k<a>\1"#);
-    test_parse_fails(r#"\1(b)\k<a>"#);
+    tc.compile(r#"\k<a>"#).test_succeeds(r#"k<a>"#);
+    tc.compile(r#"\k<4>"#).test_succeeds(r#"k<4>"#);
+    tc.compile(r#"\k<a"#).test_succeeds(r#"k<a"#);
+    tc.compile(r#"\k"#).test_succeeds(r#"k"#);
+    tc.compile(r#"(?<a>\a)"#).test_succeeds(r#"a"#);
+    tc.compile(r#"\k<a>(<a>x)"#).test_succeeds(r#"k<a><a>x"#);
+    tc.compile(r#"\k<a>\1"#).test_succeeds("k<a>\u{1}");
+    tc.compile(r#"\1(b)\k<a>"#).test_succeeds(r#"bk<a>"#);
+
+    // Negative parse tests in unicode mode.
+    test_parse_fails_flags(r#"\k<a>"#, "u");
+    test_parse_fails_flags(r#"\k<4>"#, "u");
+    test_parse_fails_flags(r#"\k<a"#, "u");
+    test_parse_fails_flags(r#"\k"#, "u");
+    test_parse_fails_flags(r#"(?<a>\a)"#, "u");
+    test_parse_fails_flags(r#"\k<a>(<a>x)"#, "u");
+    test_parse_fails_flags(r#"\k<a>\1"#, "u");
+    test_parse_fails_flags(r#"\1(b)\k<a>"#, "u");
 
     // From 262 test/language/literals/regexp/named-groups/invalid-duplicate-groupspecifier.js
     test_parse_fails(r#"(?<a>a)(?<a>a)"#);