diff --git a/CHANGELOG.md b/CHANGELOG.md index 107337103e97..26cb7bd7d5ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -104,6 +104,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom - Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos +- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash). + ### Parser - Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov diff --git a/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs b/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs index 2a8469c95813..d14d336491da 100644 --- a/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs +++ b/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs @@ -56,11 +56,11 @@ impl Rule for NoEmptyCharacterClassInRegex { fn run(ctx: &RuleContext) -> Self::Signals { let mut empty_classes = vec![]; let regex = ctx.query(); - let (Ok(regex_token), Ok(regex_flags)) = (regex.value_token(), regex.flags()) else { + let Ok((pattern, flags)) = regex.decompose() else { return empty_classes; }; - let has_v_flag = regex_flags.contains('v'); - let trimmed_text = regex_token.text_trimmed(); + let has_v_flag = flags.text().contains('v'); + let trimmed_text = pattern.text(); let mut class_start_index = None; let mut is_negated_class = false; let mut enumerated_char_iter = trimmed_text.chars().enumerate(); @@ -114,8 +114,8 @@ impl Rule for NoEmptyCharacterClassInRegex { RuleDiagnostic::new( rule_category!(), TextRange::new( - regex_token_range.start() + TextSize::from(empty_class_range.start as u32), - regex_token_range.start() + TextSize::from((empty_class_range.end + 1) as u32), + regex_token_range.start() + TextSize::from(empty_class_range.start as u32 + 1), + regex_token_range.start() + TextSize::from((empty_class_range.end + 2) as u32), ), markup! { "The regular expression includes this "{maybe_negated}"empty character class""." diff --git a/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs b/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs index 82ee7ac688c3..556cdb768bc3 100644 --- a/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs +++ b/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs @@ -131,9 +131,9 @@ fn add_control_character_to_vec( /// - Unicode code point escapes range from `\u{0}` to `\u{1F}`. /// - The Unicode flag must be set as true in order for these Unicode code point escapes to work: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode. /// - Unescaped raw characters from U+0000 to U+001F. -fn collect_control_characters(pattern: String, flags: Option) -> Option> { +fn collect_control_characters(pattern: &str, flags: &str) -> Option> { let mut control_characters: Vec = Vec::new(); - let is_unicode_flag_set = flags.unwrap_or_default().contains('u'); + let is_unicode_flag_set = flags.contains('u'); let mut iter = pattern.chars().peekable(); while let Some(c) = iter.next() { @@ -190,9 +190,10 @@ fn collect_control_characters_from_expression( .next() .and_then(|arg| arg.ok()) .and_then(|arg| JsStringLiteralExpression::cast_ref(arg.syntax())) - .map(|js_string_literal| js_string_literal.text()); + .map(|js_string_literal| js_string_literal.text()) + .unwrap_or_default(); - return collect_control_characters(pattern, regexp_flags); + return collect_control_characters(&pattern, ®exp_flags); } None } @@ -219,10 +220,8 @@ impl Rule for NoControlCharactersInRegex { ) } RegexExpressionLike::JsRegexLiteralExpression(js_regex_literal_expression) => { - collect_control_characters( - js_regex_literal_expression.pattern().ok()?, - js_regex_literal_expression.flags().ok(), - ) + let (pattern, flags) = js_regex_literal_expression.decompose().ok()?; + collect_control_characters(pattern.text(), flags.text()) } } } @@ -249,7 +248,7 @@ mod tests { #[test] fn test_collect_control_characters() { assert_eq!( - collect_control_characters(String::from("\\x00\\x0F\\u0010\\u001F"), None), + collect_control_characters("\\x00\\x0F\\u0010\\u001F", ""), Some(vec![ String::from("\\x00"), String::from("\\x0F"), @@ -258,11 +257,11 @@ mod tests { ]) ); assert_eq!( - collect_control_characters(String::from("\\u{0}\\u{1F}"), Some(String::from("u"))), + collect_control_characters("\\u{0}\\u{1F}", "u"), Some(vec![String::from("\\u{0}"), String::from("\\u{1F}")]) ); assert_eq!( - collect_control_characters(String::from("\\x20\\u0020\\u{20}\\t\\n"), None), + collect_control_characters("\\x20\\u0020\\u{20}\\t\\n", ""), None ); } diff --git a/crates/biome_js_formatter/src/utils/assignment_like.rs b/crates/biome_js_formatter/src/utils/assignment_like.rs index ab0fe7f166f4..03c66f91c888 100644 --- a/crates/biome_js_formatter/src/utils/assignment_like.rs +++ b/crates/biome_js_formatter/src/utils/assignment_like.rs @@ -1176,7 +1176,8 @@ fn is_short_argument( } AnyJsExpression::AnyJsLiteralExpression(literal) => match literal { AnyJsLiteralExpression::JsRegexLiteralExpression(regex) => { - regex.pattern()?.chars().count() <= threshold as usize + let (pattern, _) = regex.decompose()?; + pattern.text().chars().count() <= threshold as usize } AnyJsLiteralExpression::JsStringLiteralExpression(string) => { string.value_token()?.text_trimmed().len() <= threshold as usize diff --git a/crates/biome_js_syntax/src/expr_ext.rs b/crates/biome_js_syntax/src/expr_ext.rs index 8a5118541735..320292675aaa 100644 --- a/crates/biome_js_syntax/src/expr_ext.rs +++ b/crates/biome_js_syntax/src/expr_ext.rs @@ -14,7 +14,7 @@ use crate::{ use crate::{JsPreUpdateExpression, JsSyntaxKind::*}; use biome_rowan::{ declare_node_union, AstNode, AstNodeList, AstSeparatedList, NodeOrToken, SyntaxResult, - TextRange, TokenText, + TextRange, TextSize, TokenText, }; use core::iter; @@ -621,28 +621,50 @@ impl JsTemplateExpression { } impl JsRegexLiteralExpression { - pub fn pattern(&self) -> SyntaxResult { - let token = self.value_token()?; - let text_trimmed = token.text_trimmed(); - - // SAFETY: a valid regex literal must have a end slash - let end_slash_pos = text_trimmed - .rfind('/') - .expect("regex literal must have an end slash"); - - Ok(String::from(&text_trimmed[1..end_slash_pos])) - } - - pub fn flags(&self) -> SyntaxResult { + /// Decompose a regular expression into its pattern and flags. + /// + /// ``` + /// use biome_js_factory::make; + /// use biome_js_syntax::{JsSyntaxKind, JsSyntaxToken}; + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/igu"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), "igu"); + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), ""); + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), ""); + /// ``` + pub fn decompose(&self) -> SyntaxResult<(TokenText, TokenText)> { let token = self.value_token()?; let text_trimmed = token.text_trimmed(); - - // SAFETY: a valid regex literal must have a end slash - let end_slash_pos = text_trimmed - .rfind('/') - .expect("regex literal must have an end slash"); - - Ok(String::from(&text_trimmed[end_slash_pos..])) + let token_text = token.token_text_trimmed(); + let len = TextSize::from(text_trimmed.len() as u32); + let Some(end_slash_pos) = text_trimmed[1..].rfind('/').map(|x| x + 1) else { + return Ok(( + token_text + .clone() + .slice(TextRange::new(TextSize::from(1), len)), + token_text.slice(TextRange::empty(len)), + )); + }; + let end_slash_pos = end_slash_pos as u32; + let pattern = token_text.clone().slice(TextRange::new( + TextSize::from(1), + TextSize::from(end_slash_pos), + )); + let flags = token_text.slice(TextRange::new(TextSize::from(end_slash_pos + 1), len)); + Ok((pattern, flags)) } } diff --git a/website/src/content/docs/internals/changelog.mdx b/website/src/content/docs/internals/changelog.mdx index a05fed43104e..312a524dbc2a 100644 --- a/website/src/content/docs/internals/changelog.mdx +++ b/website/src/content/docs/internals/changelog.mdx @@ -110,6 +110,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom - Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos +- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash). + ### Parser - Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov