From c506cf78e1d3f3493e089ab81e955038eeb1a35f Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Fri, 29 Sep 2023 16:40:55 +0200 Subject: [PATCH] fix(lint): handle malformed reegxes --- .../no_empty_character_class_in_regex.rs | 10 +-- .../no_control_characters_in_regex.rs | 21 +++--- .../src/utils/assignment_like.rs | 3 +- crates/biome_js_syntax/src/expr_ext.rs | 64 +++++++++++++------ 4 files changed, 60 insertions(+), 38 deletions(-) diff --git a/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs b/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs index 2a8469c95813..d14d336491da 100644 --- a/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs +++ b/crates/biome_js_analyze/src/analyzers/nursery/no_empty_character_class_in_regex.rs @@ -56,11 +56,11 @@ impl Rule for NoEmptyCharacterClassInRegex { fn run(ctx: &RuleContext) -> Self::Signals { let mut empty_classes = vec![]; let regex = ctx.query(); - let (Ok(regex_token), Ok(regex_flags)) = (regex.value_token(), regex.flags()) else { + let Ok((pattern, flags)) = regex.decompose() else { return empty_classes; }; - let has_v_flag = regex_flags.contains('v'); - let trimmed_text = regex_token.text_trimmed(); + let has_v_flag = flags.text().contains('v'); + let trimmed_text = pattern.text(); let mut class_start_index = None; let mut is_negated_class = false; let mut enumerated_char_iter = trimmed_text.chars().enumerate(); @@ -114,8 +114,8 @@ impl Rule for NoEmptyCharacterClassInRegex { RuleDiagnostic::new( rule_category!(), TextRange::new( - regex_token_range.start() + TextSize::from(empty_class_range.start as u32), - regex_token_range.start() + TextSize::from((empty_class_range.end + 1) as u32), + regex_token_range.start() + TextSize::from(empty_class_range.start as u32 + 1), + regex_token_range.start() + TextSize::from((empty_class_range.end + 2) as u32), ), markup! { "The regular expression includes this "{maybe_negated}"empty character class""." diff --git a/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs b/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs index 82ee7ac688c3..556cdb768bc3 100644 --- a/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs +++ b/crates/biome_js_analyze/src/analyzers/suspicious/no_control_characters_in_regex.rs @@ -131,9 +131,9 @@ fn add_control_character_to_vec( /// - Unicode code point escapes range from `\u{0}` to `\u{1F}`. /// - The Unicode flag must be set as true in order for these Unicode code point escapes to work: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode. /// - Unescaped raw characters from U+0000 to U+001F. -fn collect_control_characters(pattern: String, flags: Option) -> Option> { +fn collect_control_characters(pattern: &str, flags: &str) -> Option> { let mut control_characters: Vec = Vec::new(); - let is_unicode_flag_set = flags.unwrap_or_default().contains('u'); + let is_unicode_flag_set = flags.contains('u'); let mut iter = pattern.chars().peekable(); while let Some(c) = iter.next() { @@ -190,9 +190,10 @@ fn collect_control_characters_from_expression( .next() .and_then(|arg| arg.ok()) .and_then(|arg| JsStringLiteralExpression::cast_ref(arg.syntax())) - .map(|js_string_literal| js_string_literal.text()); + .map(|js_string_literal| js_string_literal.text()) + .unwrap_or_default(); - return collect_control_characters(pattern, regexp_flags); + return collect_control_characters(&pattern, ®exp_flags); } None } @@ -219,10 +220,8 @@ impl Rule for NoControlCharactersInRegex { ) } RegexExpressionLike::JsRegexLiteralExpression(js_regex_literal_expression) => { - collect_control_characters( - js_regex_literal_expression.pattern().ok()?, - js_regex_literal_expression.flags().ok(), - ) + let (pattern, flags) = js_regex_literal_expression.decompose().ok()?; + collect_control_characters(pattern.text(), flags.text()) } } } @@ -249,7 +248,7 @@ mod tests { #[test] fn test_collect_control_characters() { assert_eq!( - collect_control_characters(String::from("\\x00\\x0F\\u0010\\u001F"), None), + collect_control_characters("\\x00\\x0F\\u0010\\u001F", ""), Some(vec![ String::from("\\x00"), String::from("\\x0F"), @@ -258,11 +257,11 @@ mod tests { ]) ); assert_eq!( - collect_control_characters(String::from("\\u{0}\\u{1F}"), Some(String::from("u"))), + collect_control_characters("\\u{0}\\u{1F}", "u"), Some(vec![String::from("\\u{0}"), String::from("\\u{1F}")]) ); assert_eq!( - collect_control_characters(String::from("\\x20\\u0020\\u{20}\\t\\n"), None), + collect_control_characters("\\x20\\u0020\\u{20}\\t\\n", ""), None ); } diff --git a/crates/biome_js_formatter/src/utils/assignment_like.rs b/crates/biome_js_formatter/src/utils/assignment_like.rs index ab0fe7f166f4..03c66f91c888 100644 --- a/crates/biome_js_formatter/src/utils/assignment_like.rs +++ b/crates/biome_js_formatter/src/utils/assignment_like.rs @@ -1176,7 +1176,8 @@ fn is_short_argument( } AnyJsExpression::AnyJsLiteralExpression(literal) => match literal { AnyJsLiteralExpression::JsRegexLiteralExpression(regex) => { - regex.pattern()?.chars().count() <= threshold as usize + let (pattern, _) = regex.decompose()?; + pattern.text().chars().count() <= threshold as usize } AnyJsLiteralExpression::JsStringLiteralExpression(string) => { string.value_token()?.text_trimmed().len() <= threshold as usize diff --git a/crates/biome_js_syntax/src/expr_ext.rs b/crates/biome_js_syntax/src/expr_ext.rs index 8a5118541735..320292675aaa 100644 --- a/crates/biome_js_syntax/src/expr_ext.rs +++ b/crates/biome_js_syntax/src/expr_ext.rs @@ -14,7 +14,7 @@ use crate::{ use crate::{JsPreUpdateExpression, JsSyntaxKind::*}; use biome_rowan::{ declare_node_union, AstNode, AstNodeList, AstSeparatedList, NodeOrToken, SyntaxResult, - TextRange, TokenText, + TextRange, TextSize, TokenText, }; use core::iter; @@ -621,28 +621,50 @@ impl JsTemplateExpression { } impl JsRegexLiteralExpression { - pub fn pattern(&self) -> SyntaxResult { - let token = self.value_token()?; - let text_trimmed = token.text_trimmed(); - - // SAFETY: a valid regex literal must have a end slash - let end_slash_pos = text_trimmed - .rfind('/') - .expect("regex literal must have an end slash"); - - Ok(String::from(&text_trimmed[1..end_slash_pos])) - } - - pub fn flags(&self) -> SyntaxResult { + /// Decompose a regular expression into its pattern and flags. + /// + /// ``` + /// use biome_js_factory::make; + /// use biome_js_syntax::{JsSyntaxKind, JsSyntaxToken}; + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/igu"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), "igu"); + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), ""); + /// + /// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+"), [], []); + /// let regex = make::js_regex_literal_expression(token); + /// let (pattern, flags) = regex.decompose().unwrap(); + /// assert_eq!(pattern.text(), "a+"); + /// assert_eq!(flags.text(), ""); + /// ``` + pub fn decompose(&self) -> SyntaxResult<(TokenText, TokenText)> { let token = self.value_token()?; let text_trimmed = token.text_trimmed(); - - // SAFETY: a valid regex literal must have a end slash - let end_slash_pos = text_trimmed - .rfind('/') - .expect("regex literal must have an end slash"); - - Ok(String::from(&text_trimmed[end_slash_pos..])) + let token_text = token.token_text_trimmed(); + let len = TextSize::from(text_trimmed.len() as u32); + let Some(end_slash_pos) = text_trimmed[1..].rfind('/').map(|x| x + 1) else { + return Ok(( + token_text + .clone() + .slice(TextRange::new(TextSize::from(1), len)), + token_text.slice(TextRange::empty(len)), + )); + }; + let end_slash_pos = end_slash_pos as u32; + let pattern = token_text.clone().slice(TextRange::new( + TextSize::from(1), + TextSize::from(end_slash_pos), + )); + let flags = token_text.slice(TextRange::new(TextSize::from(end_slash_pos + 1), len)); + Ok((pattern, flags)) } }