Skip to content

Commit

Permalink
fix(lint): handle malformed reegxes
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos committed Sep 29, 2023
1 parent 80fb2fc commit 69cf8b8
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 38 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom

- Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos

- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash).

### Parser

- Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ impl Rule for NoEmptyCharacterClassInRegex {
fn run(ctx: &RuleContext<Self>) -> Self::Signals {
let mut empty_classes = vec![];
let regex = ctx.query();
let (Ok(regex_token), Ok(regex_flags)) = (regex.value_token(), regex.flags()) else {
let Ok((pattern, flags)) = regex.decompose() else {
return empty_classes;
};
let has_v_flag = regex_flags.contains('v');
let trimmed_text = regex_token.text_trimmed();
let has_v_flag = flags.text().contains('v');
let trimmed_text = pattern.text();
let mut class_start_index = None;
let mut is_negated_class = false;
let mut enumerated_char_iter = trimmed_text.chars().enumerate();
Expand Down Expand Up @@ -114,8 +114,8 @@ impl Rule for NoEmptyCharacterClassInRegex {
RuleDiagnostic::new(
rule_category!(),
TextRange::new(
regex_token_range.start() + TextSize::from(empty_class_range.start as u32),
regex_token_range.start() + TextSize::from((empty_class_range.end + 1) as u32),
regex_token_range.start() + TextSize::from(empty_class_range.start as u32 + 1),
regex_token_range.start() + TextSize::from((empty_class_range.end + 2) as u32),
),
markup! {
"The regular expression includes this "<Emphasis>{maybe_negated}"empty character class"</Emphasis>"."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ fn add_control_character_to_vec(
/// - Unicode code point escapes range from `\u{0}` to `\u{1F}`.
/// - The Unicode flag must be set as true in order for these Unicode code point escapes to work: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode.
/// - Unescaped raw characters from U+0000 to U+001F.
fn collect_control_characters(pattern: String, flags: Option<String>) -> Option<Vec<String>> {
fn collect_control_characters(pattern: &str, flags: &str) -> Option<Vec<String>> {
let mut control_characters: Vec<String> = Vec::new();
let is_unicode_flag_set = flags.unwrap_or_default().contains('u');
let is_unicode_flag_set = flags.contains('u');
let mut iter = pattern.chars().peekable();

while let Some(c) = iter.next() {
Expand Down Expand Up @@ -190,9 +190,10 @@ fn collect_control_characters_from_expression(
.next()
.and_then(|arg| arg.ok())
.and_then(|arg| JsStringLiteralExpression::cast_ref(arg.syntax()))
.map(|js_string_literal| js_string_literal.text());
.map(|js_string_literal| js_string_literal.text())
.unwrap_or_default();

return collect_control_characters(pattern, regexp_flags);
return collect_control_characters(&pattern, &regexp_flags);
}
None
}
Expand All @@ -219,10 +220,8 @@ impl Rule for NoControlCharactersInRegex {
)
}
RegexExpressionLike::JsRegexLiteralExpression(js_regex_literal_expression) => {
collect_control_characters(
js_regex_literal_expression.pattern().ok()?,
js_regex_literal_expression.flags().ok(),
)
let (pattern, flags) = js_regex_literal_expression.decompose().ok()?;
collect_control_characters(pattern.text(), flags.text())
}
}
}
Expand All @@ -249,7 +248,7 @@ mod tests {
#[test]
fn test_collect_control_characters() {
assert_eq!(
collect_control_characters(String::from("\\x00\\x0F\\u0010\\u001F"), None),
collect_control_characters("\\x00\\x0F\\u0010\\u001F", ""),
Some(vec![
String::from("\\x00"),
String::from("\\x0F"),
Expand All @@ -258,11 +257,11 @@ mod tests {
])
);
assert_eq!(
collect_control_characters(String::from("\\u{0}\\u{1F}"), Some(String::from("u"))),
collect_control_characters("\\u{0}\\u{1F}", "u"),
Some(vec![String::from("\\u{0}"), String::from("\\u{1F}")])
);
assert_eq!(
collect_control_characters(String::from("\\x20\\u0020\\u{20}\\t\\n"), None),
collect_control_characters("\\x20\\u0020\\u{20}\\t\\n", ""),
None
);
}
Expand Down
3 changes: 2 additions & 1 deletion crates/biome_js_formatter/src/utils/assignment_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,8 @@ fn is_short_argument(
}
AnyJsExpression::AnyJsLiteralExpression(literal) => match literal {
AnyJsLiteralExpression::JsRegexLiteralExpression(regex) => {
regex.pattern()?.chars().count() <= threshold as usize
let (pattern, _) = regex.decompose()?;
pattern.text().chars().count() <= threshold as usize
}
AnyJsLiteralExpression::JsStringLiteralExpression(string) => {
string.value_token()?.text_trimmed().len() <= threshold as usize
Expand Down
64 changes: 43 additions & 21 deletions crates/biome_js_syntax/src/expr_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
use crate::{JsPreUpdateExpression, JsSyntaxKind::*};
use biome_rowan::{
declare_node_union, AstNode, AstNodeList, AstSeparatedList, NodeOrToken, SyntaxResult,
TextRange, TokenText,
TextRange, TextSize, TokenText,
};
use core::iter;

Expand Down Expand Up @@ -621,28 +621,50 @@ impl JsTemplateExpression {
}

impl JsRegexLiteralExpression {
pub fn pattern(&self) -> SyntaxResult<String> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[1..end_slash_pos]))
}

pub fn flags(&self) -> SyntaxResult<String> {
/// Decompose a regular expression into its pattern and flags.
///
/// ```
/// use biome_js_factory::make;
/// use biome_js_syntax::{JsSyntaxKind, JsSyntaxToken};
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/igu"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "igu");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
/// ```
pub fn decompose(&self) -> SyntaxResult<(TokenText, TokenText)> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[end_slash_pos..]))
let token_text = token.token_text_trimmed();
let len = TextSize::from(text_trimmed.len() as u32);
let Some(end_slash_pos) = text_trimmed[1..].rfind('/').map(|x| x + 1) else {
return Ok((
token_text
.clone()
.slice(TextRange::new(TextSize::from(1), len)),
token_text.slice(TextRange::empty(len)),
));
};
let end_slash_pos = end_slash_pos as u32;
let pattern = token_text.clone().slice(TextRange::new(
TextSize::from(1),
TextSize::from(end_slash_pos),
));
let flags = token_text.slice(TextRange::new(TextSize::from(end_slash_pos + 1), len));
Ok((pattern, flags))
}
}

Expand Down
2 changes: 2 additions & 0 deletions website/src/content/docs/internals/changelog.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom

- Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos

- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash).

### Parser

- Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov
Expand Down

0 comments on commit 69cf8b8

Please sign in to comment.