From f83164dad785328e6bb531b7451e41c1b75f8a42 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Sat, 22 Feb 2025 17:26:41 +0100 Subject: [PATCH] refactor(config): remove `overrides.include` --- .../src/execute/migrate/eslint_to_biome.rs | 12 +- .../biome_cli/src/execute/migrate/prettier.rs | 4 +- .../tests/commands/migrate_eslint.rs | 2 +- .../migrate_merge_with_overrides.snap | 6 +- .../biome_configuration/src/editorconfig.rs | 248 +--- crates/biome_configuration/src/lib.rs | 4 +- crates/biome_configuration/src/overrides.rs | 66 +- .../invalid/overrides/incorrect_key.json.snap | 2 - .../tests/valid/overrides/top_level_keys.json | 1 - crates/biome_glob/src/editorconfig.rs | 36 +- crates/biome_service/src/configuration.rs | 35 +- crates/biome_service/src/lib.rs | 2 - .../biome_service/src/matcher/LICENSE-APACHE | 201 ---- crates/biome_service/src/matcher/LICENSE-MIT | 25 - crates/biome_service/src/matcher/mod.rs | 250 ---- crates/biome_service/src/matcher/pattern.rs | 1057 ----------------- crates/biome_service/src/settings.rs | 54 +- e2e-tests/relative-apth-ignore-file/test.sh | 3 - .../biome.json | 0 .../file.js | 0 e2e-tests/relative-path-ignore-file/test.sh | 3 + .../@biomejs/backend-jsonrpc/src/workspace.ts | 7 +- .../@biomejs/biome/configuration_schema.json | 15 +- 23 files changed, 143 insertions(+), 1890 deletions(-) delete mode 100644 crates/biome_service/src/matcher/LICENSE-APACHE delete mode 100644 crates/biome_service/src/matcher/LICENSE-MIT delete mode 100644 crates/biome_service/src/matcher/mod.rs delete mode 100644 crates/biome_service/src/matcher/pattern.rs delete mode 100755 e2e-tests/relative-apth-ignore-file/test.sh rename e2e-tests/{relative-apth-ignore-file => relative-path-ignore-file}/biome.json (100%) rename e2e-tests/{relative-apth-ignore-file => relative-path-ignore-file}/file.js (100%) create mode 100755 e2e-tests/relative-path-ignore-file/test.sh diff --git a/crates/biome_cli/src/execute/migrate/eslint_to_biome.rs b/crates/biome_cli/src/execute/migrate/eslint_to_biome.rs index 5937009d2aee..66c9747dcd43 100644 --- a/crates/biome_cli/src/execute/migrate/eslint_to_biome.rs +++ b/crates/biome_cli/src/execute/migrate/eslint_to_biome.rs @@ -75,7 +75,8 @@ impl eslint_eslint::FlatConfigData { } let includes = to_biome_includes(&flat_config_object.files, &flat_config_object.ignores); - override_pat.includes = (!includes.is_empty()).then_some(includes); + override_pat.includes = (!includes.is_empty()) + .then_some(biome_configuration::OverrideGlobs::Globs(includes.into())); if let Some(rules) = flat_config_object.rules { if !rules.is_empty() { override_pat.linter = Some(biome_config::OverrideLinterConfiguration { @@ -155,7 +156,8 @@ impl eslint_eslint::LegacyConfigData { override_pattern.javascript = Some(js_config) } let includes = to_biome_includes(&override_elt.files, &override_elt.excluded_files); - override_pattern.includes = (!includes.is_empty()).then_some(includes); + override_pattern.includes = (!includes.is_empty()) + .then_some(biome_configuration::OverrideGlobs::Globs(includes.into())); if !override_elt.rules.is_empty() { override_pattern.linter = Some(biome_config::OverrideLinterConfiguration { rules: Some(override_elt.rules.into_biome_rules(options, &mut results)), @@ -355,6 +357,7 @@ fn to_biome_includes( #[cfg(test)] mod tests { use super::*; + use biome_configuration::OverrideGlobs; use eslint_eslint::*; use std::borrow::Cow; @@ -449,7 +452,10 @@ mod tests { let overrides = biome_config.overrides.unwrap(); assert_eq!(overrides.0.len(), 1); let override0 = overrides.0.into_iter().next().unwrap(); - assert_eq!(override0.includes.unwrap(), ["*.ts".parse().unwrap()],); + assert_eq!( + override0.includes.unwrap(), + OverrideGlobs::Globs(["*.ts".parse().unwrap()].into_iter().collect()), + ); assert_eq!( override0 .linter diff --git a/crates/biome_cli/src/execute/migrate/prettier.rs b/crates/biome_cli/src/execute/migrate/prettier.rs index 772bfc295845..a6a34d4cb64b 100644 --- a/crates/biome_cli/src/execute/migrate/prettier.rs +++ b/crates/biome_cli/src/execute/migrate/prettier.rs @@ -291,12 +291,12 @@ impl TryFrom for biome_configuration::OverridePattern { type Error = ParseFormatNumberError; fn try_from(Override { files, options }: Override) -> Result { let mut result = biome_configuration::OverridePattern { - includes: Some( + includes: Some(biome_configuration::OverrideGlobs::Globs( files .into_iter() .filter_map(|glob| glob.parse().ok()) .collect(), - ), + )), ..Default::default() }; if options.print_width.is_some() diff --git a/crates/biome_cli/tests/commands/migrate_eslint.rs b/crates/biome_cli/tests/commands/migrate_eslint.rs index 4c66afa2230a..addf904357ad 100644 --- a/crates/biome_cli/tests/commands/migrate_eslint.rs +++ b/crates/biome_cli/tests/commands/migrate_eslint.rs @@ -678,7 +678,7 @@ fn migrate_eslintrcjson_extended_rules() { fn migrate_merge_with_overrides() { let biomejson = r#"{ "overrides": [{ - "include": ["*.js"], + "includes": ["*.js"], "linter": { "enabled": false } }] }"#; diff --git a/crates/biome_cli/tests/snapshots/main_commands_migrate_eslint/migrate_merge_with_overrides.snap b/crates/biome_cli/tests/snapshots/main_commands_migrate_eslint/migrate_merge_with_overrides.snap index 534c8d6220eb..8cb14d582a73 100644 --- a/crates/biome_cli/tests/snapshots/main_commands_migrate_eslint/migrate_merge_with_overrides.snap +++ b/crates/biome_cli/tests/snapshots/main_commands_migrate_eslint/migrate_merge_with_overrides.snap @@ -8,7 +8,7 @@ expression: redactor(content) { "overrides": [ { - "include": ["*.js"], + "includes": ["*.js"], "linter": { "enabled": false } } ] @@ -53,13 +53,13 @@ biome.json migrate ━━━━━━━━━━━━━━━━━━━━ 1 1 │ { 2 │ - ········"overrides":·[{ - 3 │ - ············"include":·["*.js"], + 3 │ - ············"includes":·["*.js"], 4 │ - ············"linter":·{·"enabled":·false·} 5 │ - ········}] 6 │ - ····} 2 │ + → "linter":·{·"rules":·{·"recommended":·false·},·"includes":·["**"]·}, 3 │ + → "overrides":·[ - 4 │ + → → {·"include":·["*.js"],·"linter":·{·"enabled":·false·}·}, + 4 │ + → → {·"includes":·["*.js"],·"linter":·{·"enabled":·false·}·}, 5 │ + → → { 6 │ + → → → "includes":·["bin/*.js",·"lib/*.js",·"!*.test.js"], 7 │ + → → → "linter":·{·"rules":·{·"suspicious":·{·"noDoubleEquals":·"off"·}·}·} diff --git a/crates/biome_configuration/src/editorconfig.rs b/crates/biome_configuration/src/editorconfig.rs index 885e440372cb..3226422deea9 100644 --- a/crates/biome_configuration/src/editorconfig.rs +++ b/crates/biome_configuration/src/editorconfig.rs @@ -17,8 +17,8 @@ use serde::{Deserialize, Deserializer}; use crate::{ diagnostics::{EditorConfigDiagnostic, ParseFailedDiagnostic}, - Configuration, FormatterConfiguration, OverrideFormatterConfiguration, OverridePattern, - Overrides, + Configuration, FormatterConfiguration, OverrideFormatterConfiguration, OverrideGlobs, + OverridePattern, Overrides, }; pub fn parse_str(s: &str) -> Result { @@ -48,28 +48,20 @@ impl EditorConfig { formatter: self.options.remove("*").map(|o| o.to_biome()), ..Default::default() }; - let mut errors = vec![]; let overrides: Vec<_> = self .options .into_iter() - .map(|(k, v)| { - let patterns = match expand_unknown_glob_patterns(&k) { - Ok(patterns) => patterns - .into_iter() - .map(hack_convert_double_star) - .map(String::into_boxed_str) - .collect(), - Err(err) => { - errors.push(err); - vec![k.into_boxed_str()] - } - }; - - OverridePattern { - include: Some(patterns), - formatter: Some(v.to_biome_override()), - ..Default::default() - } + .filter_map(|(k, v)| { + // Ignore glob patterns thatc annot be parsed + Some(( + biome_glob::editorconfig::EditorconfigGlob::try_from(k).ok()?, + v, + )) + }) + .map(|(glob, v)| OverridePattern { + includes: Some(OverrideGlobs::EditorconfigGlob(Box::new(glob))), + formatter: Some(v.to_biome_override()), + ..Default::default() }) .collect(); config.overrides = Some(Overrides(overrides)); @@ -184,160 +176,6 @@ where } } -/// Turn an unknown glob pattern into a list of known glob patterns. This is part of a hack to support all editorconfig patterns. -/// -/// TODO: remove in biome 2.0 -fn expand_unknown_glob_patterns(pattern: &str) -> Result>, EditorConfigDiagnostic> { - struct Variants { - /// index of the { character - start: usize, - /// index of the } character - end: usize, - variants: Option, - } - - impl Variants { - fn new(start: usize) -> Self { - Self { - start, - end: start, - variants: None, - } - } - - fn parse_to_variants(&mut self, s: &str) -> Result<(), EditorConfigDiagnostic> { - let s = s.trim_start_matches('{').trim_end_matches('}'); - if s.contains("..") { - let mut parts = s.split(".."); - let start = parts.next().ok_or_else(|| { - EditorConfigDiagnostic::invalid_glob_pattern( - s, - "Range pattern must have exactly two parts", - ) - })?; - let end = parts.next().ok_or_else(|| { - EditorConfigDiagnostic::invalid_glob_pattern( - s, - "Range pattern must have exactly two parts", - ) - })?; - if parts.next().is_some() { - return Err(EditorConfigDiagnostic::invalid_glob_pattern( - s, - "Range pattern must have exactly two parts", - )); - } - - let start = start.parse().map_err(|err| { - EditorConfigDiagnostic::invalid_glob_pattern( - s, - format!("Error parsing the start of the range: {err}"), - ) - })?; - let end = end.parse().map_err(|err| { - EditorConfigDiagnostic::invalid_glob_pattern( - s, - format!("Error parsing the end of the range: {err}"), - ) - })?; - self.variants = Some(VariantType::Range((start, end))); - } else { - self.variants = Some(VariantType::List( - s.split(',').map(|s| s.to_string()).collect(), - )); - } - - Ok(()) - } - - fn variants(&self) -> Vec { - match &self.variants { - Some(VariantType::List(ref list)) => list.clone(), - Some(VariantType::Range((start, end))) => { - let mut variants = vec![]; - for i in *start..=*end { - variants.push(i.to_string()); - } - variants - } - None => vec![], - } - } - } - - enum VariantType { - List(Vec), - Range((i64, i64)), - } - - let mut all_variants = vec![]; - let mut current_variants = None; - for (index, byte) in pattern.bytes().enumerate() { - match byte { - b'{' => { - if current_variants.is_none() { - current_variants = Some(Variants::new(index)); - } else { - // TODO: error, recursive brace expansion is not supported - } - } - b'}' => { - if let Some(mut v) = current_variants.take() { - v.end = index; - v.parse_to_variants(&pattern[v.start..=v.end])?; - all_variants.push(v); - } - } - _ => {} - } - } - - if all_variants.is_empty() { - return Ok(vec![pattern.to_string().into_boxed_str()]); - } - - let mut expanded_patterns = Vec::new(); - for variants in all_variants.iter().rev() { - if expanded_patterns.is_empty() { - for variant in &variants.variants() { - let mut pattern = pattern.to_string(); - pattern.replace_range(variants.start..=variants.end, variant); - expanded_patterns.push(pattern.into_boxed_str()); - } - } else { - let mut new_patterns = Vec::new(); - for existing in &expanded_patterns { - for variant in &variants.variants() { - let mut pattern = existing.to_string(); - pattern.replace_range(variants.start..=variants.end, variant); - new_patterns.push(pattern.into_boxed_str()); - } - } - expanded_patterns = new_patterns; - } - } - - Ok(expanded_patterns) -} - -/// The EditorConfig spec allows for patterns like `**.yml`, which is not supported by biome. This function corrects such patterns so that they can be parsed by biome's glob parser. -fn hack_convert_double_star(pattern: impl AsRef) -> String { - pattern - .as_ref() - .split('/') - .map(|component| { - if component == "**" { - component.to_string() - } else if component.contains("**") { - component.replace("**", "**/*") - } else { - component.to_string() - } - }) - .collect::>() - .join("/") -} - #[cfg(test)] mod tests { use super::*; @@ -448,64 +286,4 @@ insert_final_newline = unset EditorconfigValue::Default )); } - - #[test] - fn should_expand_glob_pattern_list() { - let pattern = "package.json"; - let mut expanded = - expand_unknown_glob_patterns(pattern).expect("Failed to expand glob pattern"); - expanded.sort(); - assert_eq!(expanded, ["package.json".into()]); - - let pattern = "{package.json,.travis.yml}"; - let mut expanded = - expand_unknown_glob_patterns(pattern).expect("Failed to expand glob pattern"); - expanded.sort(); - assert_eq!(expanded, [".travis.yml".into(), "package.json".into()]); - } - - #[test] - fn should_expand_glob_pattern_list_2() { - let pattern = "**/{foo,bar}.{test,spec}.js"; - let mut expanded = - expand_unknown_glob_patterns(pattern).expect("Failed to expand glob pattern"); - expanded.sort(); - assert_eq!( - expanded, - [ - "**/bar.spec.js".into(), - "**/bar.test.js".into(), - "**/foo.spec.js".into(), - "**/foo.test.js".into(), - ] - ); - } - - #[test] - fn should_expand_glob_pattern_range() { - let pattern = "**/bar.{1..4}.js"; - let mut expanded = - expand_unknown_glob_patterns(pattern).expect("Failed to expand glob pattern"); - expanded.sort(); - assert_eq!( - expanded, - [ - "**/bar.1.js".into(), - "**/bar.2.js".into(), - "**/bar.3.js".into(), - "**/bar.4.js".into() - ] - ); - } - - #[test] - fn should_correct_double_star() { - let pattern = "**.yml"; - let corrected = hack_convert_double_star(pattern); - assert_eq!(corrected, "**/*.yml",); - - let pattern = "**/*.yml"; - let corrected = hack_convert_double_star(pattern); - assert_eq!(corrected, "**/*.yml",); - } } diff --git a/crates/biome_configuration/src/lib.rs b/crates/biome_configuration/src/lib.rs index 3da5407eda79..fbdc82835635 100644 --- a/crates/biome_configuration/src/lib.rs +++ b/crates/biome_configuration/src/lib.rs @@ -49,8 +49,8 @@ pub use html::{html_configuration, HtmlConfiguration}; pub use javascript::{js_configuration, JsConfiguration}; pub use json::{json_configuration, JsonConfiguration}; pub use overrides::{ - OverrideAssistConfiguration, OverrideFormatterConfiguration, OverrideLinterConfiguration, - OverridePattern, Overrides, + OverrideAssistConfiguration, OverrideFormatterConfiguration, OverrideGlobs, + OverrideLinterConfiguration, OverridePattern, Overrides, }; use plugins::Plugins; use serde::{Deserialize, Serialize}; diff --git a/crates/biome_configuration/src/overrides.rs b/crates/biome_configuration/src/overrides.rs index 13c08cd68c59..830a20894645 100644 --- a/crates/biome_configuration/src/overrides.rs +++ b/crates/biome_configuration/src/overrides.rs @@ -15,34 +15,20 @@ use biome_formatter::{ use bpaf::Bpaf; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use std::str::FromStr; #[derive(Clone, Debug, Default, Deserialize, Deserializable, Eq, Merge, PartialEq, Serialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct Overrides(pub Vec); -impl FromStr for Overrides { - type Err = String; - - fn from_str(_s: &str) -> Result { - Ok(Self::default()) - } -} - -#[derive(Clone, Debug, Default, Deserialize, Deserializable, Eq, Merge, PartialEq, Serialize)] +#[derive(Clone, Debug, Default, Deserialize, Deserializable, Eq, PartialEq, Serialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] #[serde(rename_all = "camelCase", default, deny_unknown_fields)] pub struct OverridePattern { - /// A list of Unix shell style patterns. Biome will include files/folders that will - /// match these patterns. - #[serde(skip_serializing_if = "Option::is_none")] - pub include: Option>>, - /// A list of glob patterns. Biome will include files/folders that will /// match these patterns. #[serde(skip_serializing_if = "Option::is_none")] - pub includes: Option>, + pub includes: Option, /// Specific configuration for the JavaScript language #[serde(skip_serializing_if = "Option::is_none")] @@ -81,17 +67,41 @@ pub struct OverridePattern { pub assist: Option, } -impl FromStr for OverridePattern { - type Err = String; - - fn from_str(_s: &str) -> Result { - Ok(Self::default()) +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[serde(untagged)] +pub enum OverrideGlobs { + Globs(Box<[biome_glob::Glob]>), + EditorconfigGlob(Box), +} +impl OverrideGlobs { + /// Normalize `path` and match it against the list of globs. + pub fn is_match_candidate(&self, path: &biome_glob::CandidatePath) -> bool { + match self { + OverrideGlobs::Globs(globs) => path.matches_with_exceptions(globs), + OverrideGlobs::EditorconfigGlob(glob) => glob.is_match_candidate(path), + } + } +} +impl biome_deserialize::Deserializable for OverrideGlobs { + fn deserialize( + ctx: &mut impl biome_deserialize::DeserializationContext, + value: &impl biome_deserialize::DeserializableValue, + name: &str, + ) -> Option { + biome_deserialize::Deserializable::deserialize(ctx, value, name).map(OverrideGlobs::Globs) + } +} +#[cfg(feature = "schema")] +impl schemars::JsonSchema for OverrideGlobs { + fn schema_name() -> String { + "OverrideGlobs".to_string() + } + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + Vec::::json_schema(gen) } } -#[derive( - Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, Merge, PartialEq, Serialize, -)] +#[derive(Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, PartialEq, Serialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] #[serde(rename_all = "camelCase", default, deny_unknown_fields)] pub struct OverrideFormatterConfiguration { @@ -148,9 +158,7 @@ pub struct OverrideFormatterConfiguration { pub bracket_spacing: Option, } -#[derive( - Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, Merge, PartialEq, Serialize, -)] +#[derive(Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, PartialEq, Serialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] #[serde(rename_all = "camelCase", default, deny_unknown_fields)] pub struct OverrideLinterConfiguration { @@ -170,9 +178,7 @@ pub struct OverrideLinterConfiguration { pub domains: Option>, } -#[derive( - Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, Merge, PartialEq, Serialize, -)] +#[derive(Bpaf, Clone, Debug, Default, Deserialize, Deserializable, Eq, PartialEq, Serialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] #[serde(rename_all = "camelCase", default, deny_unknown_fields)] pub struct OverrideAssistConfiguration { diff --git a/crates/biome_configuration/tests/invalid/overrides/incorrect_key.json.snap b/crates/biome_configuration/tests/invalid/overrides/incorrect_key.json.snap index b1787223a3f8..3e0481cd86cc 100644 --- a/crates/biome_configuration/tests/invalid/overrides/incorrect_key.json.snap +++ b/crates/biome_configuration/tests/invalid/overrides/incorrect_key.json.snap @@ -1,6 +1,5 @@ --- source: crates/biome_configuration/tests/spec_tests.rs -assertion_line: 58 expression: incorrect_key.json --- incorrect_key.json:4:4 deserialize ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -16,7 +15,6 @@ incorrect_key.json:4:4 deserialize ━━━━━━━━━━━━━━━ i Known keys: - - include - includes - javascript - json diff --git a/crates/biome_configuration/tests/valid/overrides/top_level_keys.json b/crates/biome_configuration/tests/valid/overrides/top_level_keys.json index fb4f83ce3b67..e3fe4ff94e75 100644 --- a/crates/biome_configuration/tests/valid/overrides/top_level_keys.json +++ b/crates/biome_configuration/tests/valid/overrides/top_level_keys.json @@ -1,7 +1,6 @@ { "overrides": [ { - "include": [], "includes": [], "javascript": {}, "json": {}, diff --git a/crates/biome_glob/src/editorconfig.rs b/crates/biome_glob/src/editorconfig.rs index 11e437fa557d..d99f7eade8c5 100644 --- a/crates/biome_glob/src/editorconfig.rs +++ b/crates/biome_glob/src/editorconfig.rs @@ -233,6 +233,7 @@ fn convert_glob(mut pattern: String) -> Result { } let mut it = pattern.bytes().enumerate(); let mut changes = Vec::new(); + let mut implicit_globstar_prefix = true; let mut prev_is_slash = true; let mut alternation_start = None; let mut alternation_has_coma = false; @@ -248,12 +249,14 @@ fn convert_glob(mut pattern: String) -> Result { } } b'/' => { + implicit_globstar_prefix = false; prev_is_slash = true; continue; } b'*' => { let mut lookahead = it.clone(); if matches!(lookahead.next(), Some((_, b'*'))) { + implicit_globstar_prefix = false; if !prev_is_slash { changes.push(StrOp::Insert { index: i, @@ -381,6 +384,9 @@ fn convert_glob(mut pattern: String) -> Result { } } } + if implicit_globstar_prefix && !pattern.is_empty() && !pattern.starts_with("**") { + pattern.insert_str(0, "**/"); + } Ok(pattern) } @@ -408,28 +414,28 @@ mod tests { assert_eq!(convert_valid_glob("**a"), "**/*a"); assert_eq!(convert_valid_glob("***"), "**/*"); - assert_eq!(convert_valid_glob(r"[\]a]"), r"[\]a]"); + assert_eq!(convert_valid_glob(r"[\]a]"), r"**/[\]a]"); assert_eq!(convert_valid_glob("[a/b]"), r"\[a/b]"); - assert_eq!(convert_valid_glob("{}"), r"\{\}"); + assert_eq!(convert_valid_glob("{}"), r"**/\{\}"); assert_eq!(convert_valid_glob("[a/b]{}"), r"\[a/b]\{\}"); - assert_eq!(convert_valid_glob("{a}"), r"\{a\}"); - assert_eq!(convert_valid_glob("{a,b"), r"\{a,b"); + assert_eq!(convert_valid_glob("{a}"), r"**/\{a\}"); + assert_eq!(convert_valid_glob("{a,b"), r"**/\{a,b"); - assert_eq!(convert_valid_glob("{a,b}"), "{a,b}"); - assert_eq!(convert_valid_glob("{0,1}"), "{0,1}"); - assert_eq!(convert_valid_glob("{a,0..1}"), "{a,0..1}"); + assert_eq!(convert_valid_glob("{a,b}"), "**/{a,b}"); + assert_eq!(convert_valid_glob("{0,1}"), "**/{0,1}"); + assert_eq!(convert_valid_glob("{a,0..1}"), "**/{a,0..1}"); - assert_eq!(convert_valid_glob("{0..1}"), "{0,1}"); - assert_eq!(convert_valid_glob("{0..9}"), "{0,1,2,3,4,5,6,7,8,9}"); - assert_eq!(convert_valid_glob("{+1..+8}"), "{1,2,3,4,5,6,7,8}"); - assert_eq!(convert_valid_glob("{0..0}"), "{0}"); + assert_eq!(convert_valid_glob("{0..1}"), "**/{0,1}"); + assert_eq!(convert_valid_glob("{0..9}"), "**/{0,1,2,3,4,5,6,7,8,9}"); + assert_eq!(convert_valid_glob("{+1..+8}"), "**/{1,2,3,4,5,6,7,8}"); + assert_eq!(convert_valid_glob("{0..0}"), "**/{0}"); - assert_eq!(convert_valid_glob("{10..12}"), "{10,11,12}"); - assert_eq!(convert_valid_glob("{10..10}"), "{10}"); + assert_eq!(convert_valid_glob("{10..12}"), "**/{10,11,12}"); + assert_eq!(convert_valid_glob("{10..10}"), "**/{10}"); - assert_eq!(convert_valid_glob(r"\{0..0}"), r"\{0..0\}"); - assert_eq!(convert_valid_glob("{a..b}"), r"\{a..b\}"); + assert_eq!(convert_valid_glob(r"\{0..0}"), r"**/\{0..0\}"); + assert_eq!(convert_valid_glob("{a..b}"), r"**/\{a..b\}"); } // Editorconfig glob tests are ported from https://github.com/editorconfig/editorconfig-core-test/tree/master/glob diff --git a/crates/biome_service/src/configuration.rs b/crates/biome_service/src/configuration.rs index 4198d36711e3..892b63c9224f 100644 --- a/crates/biome_service/src/configuration.rs +++ b/crates/biome_service/src/configuration.rs @@ -1,4 +1,3 @@ -use crate::matcher::Pattern; use crate::settings::Settings; use crate::WorkspaceError; use biome_analyze::AnalyzerRules; @@ -299,39 +298,9 @@ pub fn load_editorconfig( // How .editorconfig is supposed to be resolved: https://editorconfig.org/#file-location // We currently don't support the `root` property, so we just search for the file like we do for biome.json if let Some(auto_search_result) = fs.auto_search_file(&workspace_root, ".editorconfig") { - let AutoSearchResult { - content, - file_path: path, - .. - } = auto_search_result; + let AutoSearchResult { content, .. } = auto_search_result; let editorconfig = biome_configuration::editorconfig::parse_str(&content)?; - let config = editorconfig.to_biome(); - - let patterns = config - .0 - .as_ref() - .and_then(|c| c.overrides.as_ref()) - .map(|overrides| { - overrides - .0 - .iter() - .flat_map(|override_pattern| override_pattern.include.iter().flatten()) - }); - - if let Some(patterns) = patterns { - for pattern in patterns { - if let Err(err) = Pattern::new(pattern) { - return Err(BiomeDiagnostic::new_invalid_ignore_pattern_with_path( - pattern, - err.to_string(), - path.as_str(), - ) - .into()); - } - } - } - - Ok(config) + Ok(editorconfig.to_biome()) } else { Ok((None, vec![])) } diff --git a/crates/biome_service/src/lib.rs b/crates/biome_service/src/lib.rs index 1ae2c352a94d..57d26b1d9ec5 100644 --- a/crates/biome_service/src/lib.rs +++ b/crates/biome_service/src/lib.rs @@ -1,7 +1,6 @@ pub mod documentation; pub mod file_handlers; -pub mod matcher; pub mod projects; pub mod settings; pub mod workspace; @@ -21,7 +20,6 @@ use biome_fs::{FileSystem, OsFileSystem}; pub use diagnostics::{extension_error, TransportError, WorkspaceError}; pub use file_handlers::JsFormatterSettings; -pub use matcher::Matcher; pub use workspace::Workspace; /// This is the main entrypoint of the application. diff --git a/crates/biome_service/src/matcher/LICENSE-APACHE b/crates/biome_service/src/matcher/LICENSE-APACHE deleted file mode 100644 index cd7ef7e8fb62..000000000000 --- a/crates/biome_service/src/matcher/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright (c) 2023 Biome Developers and Contributors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/crates/biome_service/src/matcher/LICENSE-MIT b/crates/biome_service/src/matcher/LICENSE-MIT deleted file mode 100644 index 39d4bdb5acd3..000000000000 --- a/crates/biome_service/src/matcher/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2014 The Rust Project Developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/crates/biome_service/src/matcher/mod.rs b/crates/biome_service/src/matcher/mod.rs deleted file mode 100644 index dffd9d003417..000000000000 --- a/crates/biome_service/src/matcher/mod.rs +++ /dev/null @@ -1,250 +0,0 @@ -pub mod pattern; - -use crate::WorkspaceError; -use biome_configuration::BiomeDiagnostic; -use biome_console::markup; -use biome_diagnostics::Diagnostic; -use camino::{Utf8Path, Utf8PathBuf}; -use papaya::HashMap; -pub use pattern::{MatchOptions, Pattern, PatternError}; -use rustc_hash::FxBuildHasher; -use std::sync::Arc; - -/// A data structure to use when there's need to match a string or a path a against -/// a unix shell style patterns -#[derive(Clone, Debug, Default)] -pub struct Matcher(Arc); - -impl Matcher { - pub fn empty() -> Self { - Self::default() - } - - /// Creates a [Matcher] from a set of globs. - /// - /// ## Errors - /// - /// It can raise an error if the patterns aren't valid - pub fn from_globs( - working_directory: Option, - globs: Option<&[Box]>, - ) -> Result { - let mut matcher = Inner::default(); - if let Some(working_directory) = working_directory { - matcher.set_root(working_directory) - } - if let Some(string_set) = globs { - for pattern in string_set { - matcher.add_pattern(pattern).map_err(|err| { - BiomeDiagnostic::new_invalid_ignore_pattern( - pattern.to_string(), - err.msg.to_string(), - ) - })?; - } - } - Ok(Self(Arc::new(matcher))) - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Matches the given string against the stored patterns. - /// - /// Returns [true] if there's at least one match. - pub fn matches(&self, source: &str) -> bool { - self.0.matches(source) - } - - /// Matches the given path against the stored patterns. - /// - /// Returns [true] if there's at least one match. - pub fn matches_path(&self, source: &Utf8Path) -> bool { - self.0.matches_path(source) - } -} - -#[derive(Clone, Debug, Default)] -struct Inner { - root: Option, - patterns: Vec, - /// Check [glob website](https://docs.rs/glob/latest/glob/struct.MatchOptions.html) for [MatchOptions] - options: MatchOptions, - /// Cached results for matches. - already_checked: HashMap, -} - -impl Inner { - /// Creates a new Matcher with given options. - #[cfg(test)] - fn new(options: MatchOptions) -> Self { - Self { - root: None, - patterns: Vec::new(), - options, - already_checked: HashMap::default(), - } - } - - fn set_root(&mut self, root: Utf8PathBuf) { - self.root = Some(root); - } - - /// It adds a unix shell style pattern - fn add_pattern(&mut self, pattern: &str) -> Result<(), PatternError> { - let pattern = Pattern::new(pattern)?; - self.patterns.push(pattern); - Ok(()) - } - - /// It matches the given string against the stored patterns. - /// - /// It returns [true] if there's at least a match - fn matches(&self, source: &str) -> bool { - let already_checked = self.already_checked.pin(); - if let Some(matches) = already_checked.get(source) { - return *matches; - } - for pattern in &self.patterns { - if pattern.matches_with(source, self.options) || source.contains(pattern.as_str()) { - already_checked.insert(source.to_string(), true); - return true; - } - } - already_checked.insert(source.to_string(), false); - false - } - - fn is_empty(&self) -> bool { - self.patterns.is_empty() - } - - /// It matches the given path against the stored patterns - /// - /// It returns [true] if there's at least one match - fn matches_path(&self, source: &Utf8Path) -> bool { - if self.is_empty() { - return false; - } - let already_checked = self.already_checked.pin(); - let source_as_string = source.as_str(); - if let Some(matches) = already_checked.get(source_as_string) { - return *matches; - } - let matches = self.run_match(source); - - already_checked.insert(source_as_string.to_string(), matches); - - matches - } - - fn run_match(&self, source: &Utf8Path) -> bool { - for pattern in &self.patterns { - let matches = if pattern.matches_path_with(source, self.options) { - true - } else { - // Here we cover cases where the user specifies single files inside the patterns. - // The pattern library doesn't support single files, we here we just do a check - // on contains - // - // Given the pattern `out`: - // - `out/index.html` -> matches - // - `out/` -> matches - // - `layout.tsx` -> does not match - // - `routes/foo.ts` -> does not match - source - .ancestors() - .any(|ancestor| ancestor.ends_with(pattern.as_str())) - }; - - if matches { - return true; - } - } - false - } -} - -impl Diagnostic for PatternError { - fn description(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(fmt, "{}", self.msg) - } - - fn message(&self, fmt: &mut biome_console::fmt::Formatter<'_>) -> std::io::Result<()> { - fmt.write_markup(markup!({ self.msg })) - } -} - -#[cfg(test)] -mod test { - use crate::matcher::pattern::MatchOptions; - use crate::matcher::Inner; - use camino::Utf8PathBuf; - use std::env; - - #[test] - fn matches() { - let current = env::current_dir().unwrap(); - let dir = format!("{}/**/*.rs", current.display()); - let mut ignore = Inner::new(MatchOptions::default()); - ignore.add_pattern(&dir).unwrap(); - let path = env::current_dir().unwrap().join("src/workspace.rs"); - let result = ignore.matches(path.to_str().unwrap()); - - assert!(result); - } - - #[test] - fn matches_path() { - let current = env::current_dir().unwrap(); - let dir = format!("{}/**/*.rs", current.display()); - let mut ignore = Inner::new(MatchOptions::default()); - ignore.add_pattern(&dir).unwrap(); - let path = Utf8PathBuf::from_path_buf(env::current_dir().unwrap()) - .unwrap() - .join("src/workspace.rs"); - let result = ignore.matches_path(path.as_path()); - - assert!(result); - } - - #[test] - fn matches_path_for_single_file_or_directory_name() { - let dir = "inv"; - let valid_test_dir = "valid/"; - let mut ignore = Inner::new(MatchOptions::default()); - ignore.add_pattern(dir).unwrap(); - ignore.add_pattern(valid_test_dir).unwrap(); - let path = Utf8PathBuf::from_path_buf(env::current_dir().unwrap()) - .unwrap() - .join("tests") - .join("invalid"); - let result = ignore.matches_path(path.as_path()); - - assert!(!result); - - let path = Utf8PathBuf::from_path_buf(env::current_dir().unwrap()) - .unwrap() - .join("tests") - .join("valid"); - let result = ignore.matches_path(path.as_path()); - - assert!(result); - } - - #[test] - fn matches_single_path() { - let dir = "workspace.rs"; - let mut ignore = Inner::new(MatchOptions { - require_literal_separator: true, - case_sensitive: true, - require_literal_leading_dot: true, - }); - ignore.add_pattern(dir).unwrap(); - let path = env::current_dir().unwrap().join("src/workspace.rs"); - let result = ignore.matches(path.to_str().unwrap()); - - assert!(result); - } -} diff --git a/crates/biome_service/src/matcher/pattern.rs b/crates/biome_service/src/matcher/pattern.rs deleted file mode 100644 index 45c882582474..000000000000 --- a/crates/biome_service/src/matcher/pattern.rs +++ /dev/null @@ -1,1057 +0,0 @@ -use crate::matcher::pattern::CharSpecifier::{CharRange, SingleChar}; -use crate::matcher::pattern::MatchResult::{ - EntirePatternDoesntMatch, Match, SubPatternDoesntMatch, -}; -use crate::matcher::pattern::PatternToken::{ - AnyChar, AnyExcept, AnyPattern, AnyRecursiveSequence, AnySequence, AnyWithin, Char, -}; -use camino::Utf8Path; -use std::error::Error; -use std::str::FromStr; -use std::{fmt, path}; - -/// A pattern parsing error. -#[derive(Debug)] -#[expect(missing_copy_implementations)] -pub struct PatternError { - /// The approximate character index of where the error occurred. - pub pos: usize, - - /// A message describing the error. - pub msg: &'static str, -} - -impl Error for PatternError { - fn description(&self) -> &str { - self.msg - } -} - -impl fmt::Display for PatternError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "Pattern syntax error near position {}: {}", - self.pos, self.msg - ) - } -} - -/// A compiled Unix shell style pattern. -/// -/// - `?` matches any single character. -/// -/// - `*` matches any (possibly empty) sequence of characters. -/// -/// - `**` matches the current directory and arbitrary subdirectories. This -/// sequence **must** form a single path component, so both `**a` and `b**` -/// are invalid and will result in an error. A sequence of more than two -/// consecutive `*` characters is also invalid. -/// -/// - `[...]` matches any character inside the brackets. Character sequences -/// can also specify ranges of characters, as ordered by Unicode, so e.g. -/// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed -/// bracket is invalid. -/// -/// - `[!...]` is the negation of `[...]`, i.e. it matches any characters -/// **not** in the brackets. -/// -/// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets -/// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it -/// is interpreted as being part of, rather then ending, the character set, so -/// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-` -/// character can be specified inside a character sequence pattern by placing -/// it at the start or the end, e.g. `[abc-]`. -/// -/// - `{...}` can be used to specify multiple patterns separated by commas. For -/// example, `a/{b,c}/d` will match `a/b/d` and `a/c/d`. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] -pub struct Pattern { - /// The original glob pattern that was parsed to create this `Pattern`. - original: String, - tokens: Vec, - is_recursive: bool, - /// Did this pattern come from an `.editorconfig` file? - /// - /// TODO: Remove this flag and support `{a,b}` globs in Biome 2.0 - is_editorconfig: bool, -} - -/// Show the original glob pattern. -impl fmt::Display for Pattern { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.original.fmt(f) - } -} - -impl FromStr for Pattern { - type Err = PatternError; - - fn from_str(s: &str) -> Result { - Self::new(s) - } -} - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -enum PatternToken { - Char(char), - AnyChar, - AnySequence, - AnyRecursiveSequence, - AnyWithin(Vec), - AnyExcept(Vec), - /// A set of patterns that at least one of them must match - AnyPattern(Vec), -} - -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -enum CharSpecifier { - SingleChar(char), - CharRange(char, char), -} - -#[derive(Copy, Clone, PartialEq)] -enum MatchResult { - Match, - SubPatternDoesntMatch, - EntirePatternDoesntMatch, -} - -const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`"; -const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \ - component"; -const ERROR_INVALID_RANGE: &str = "invalid range pattern"; - -impl Pattern { - /// This function compiles Unix shell style patterns. - /// - /// An invalid glob pattern will yield a `PatternError`. - pub fn new(pattern: &str) -> Result { - Self::parse(pattern, false) - } - - /// This function compiles Unix shell style patterns. - /// - /// An invalid glob pattern will yield a `PatternError`. - pub fn parse(pattern: &str, is_editorconfig: bool) -> Result { - let chars = pattern.chars().collect::>(); - let mut tokens = Vec::new(); - let mut is_recursive = false; - let mut i = 0; - - // A pattern is relative if it starts with "." followed by a separator, - // eg. "./test" or ".\test" - let is_relative = matches!(chars.get(..2), Some(['.', sep]) if path::is_separator(*sep)); - if is_relative { - // If a pattern starts with a relative prefix, strip it from the - // pattern and replace it with a "**" sequence - i += 2; - tokens.push(AnyRecursiveSequence); - } else { - // A pattern is absolute if it starts with a path separator, eg. "/home" or "\\?\C:\Users" - let mut is_absolute = chars.first().is_some_and(|c| path::is_separator(*c)); - - // On windows a pattern may also be absolute if it starts with a - // drive letter, a colon and a separator, eg. "c:/Users" or "G:\Users" - if cfg!(windows) && !is_absolute { - is_absolute = matches!(chars.get(..3), Some(['a'..='z' | 'A'..='Z', ':', sep]) if path::is_separator(*sep)); - } - - // If a pattern is not absolute, insert a "**" sequence in front - if !is_absolute { - tokens.push(AnyRecursiveSequence); - } - } - - while i < chars.len() { - match chars[i] { - '?' => { - tokens.push(AnyChar); - i += 1; - } - '*' => { - let old = i; - - while i < chars.len() && chars[i] == '*' { - i += 1; - } - - let count = i - old; - - match count { - count if count > 2 => { - return Err(PatternError { - pos: old + 2, - msg: ERROR_WILDCARDS, - }); - } - count if count == 2 => { - // ** can only be an entire path component - // i.e. a/**/b is valid, but a**/b or a/**b is not - // invalid matches are treated literally - let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) { - // it ends in a '/' - if i < chars.len() && path::is_separator(chars[i]) { - i += 1; - true - // or the pattern ends here - // this enables the existing globbing mechanism - } else if i == chars.len() { - true - // `**` ends in non-separator - } else { - return Err(PatternError { - pos: i, - msg: ERROR_RECURSIVE_WILDCARDS, - }); - } - // `**` begins with non-separator - } else { - return Err(PatternError { - pos: old - 1, - msg: ERROR_RECURSIVE_WILDCARDS, - }); - }; - - if is_valid { - // collapse consecutive AnyRecursiveSequence to a - // single one - - let tokens_len = tokens.len(); - - if !(tokens_len > 1 - && tokens[tokens_len - 1] == AnyRecursiveSequence) - { - is_recursive = true; - tokens.push(AnyRecursiveSequence); - } - } - } - _ => { - tokens.push(AnySequence); - } - } - } - '[' => { - if i + 4 <= chars.len() && chars[i + 1] == '!' { - match chars[i + 3..].iter().position(|x| *x == ']') { - None => (), - Some(j) => { - let chars = &chars[i + 2..i + 3 + j]; - let cs = parse_char_specifiers(chars); - tokens.push(AnyExcept(cs)); - i += j + 4; - continue; - } - } - } else if i + 3 <= chars.len() && chars[i + 1] != '!' { - match chars[i + 2..].iter().position(|x| *x == ']') { - None => (), - Some(j) => { - let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]); - tokens.push(AnyWithin(cs)); - i += j + 3; - continue; - } - } - } - - // if we get here then this is not a valid range pattern - return Err(PatternError { - pos: i, - msg: ERROR_INVALID_RANGE, - }); - } - '{' if is_editorconfig => { - let mut depth = 1; - let mut j = i + 1; - while j < chars.len() { - match chars[j] { - '{' => depth += 1, - '}' => depth -= 1, - _ => (), - } - if depth > 1 { - return Err(PatternError { - pos: j, - msg: "nested '{' in '{...}' is not allowed", - }); - } - if depth == 0 { - break; - } - j += 1; - } - - if depth != 0 { - return Err(PatternError { - pos: i, - msg: "unmatched '{'", - }); - } - - let mut subpatterns = Vec::new(); - for subpattern in pattern[i + 1..j].split(',') { - let mut pattern = Pattern::new(subpattern)?; - // HACK: remove the leading '**' if it exists - if pattern.tokens.first() == Some(&PatternToken::AnyRecursiveSequence) { - pattern.tokens.remove(0); - } - subpatterns.push(pattern); - } - tokens.push(AnyPattern(subpatterns)); - i = j + 1; - } - c => { - tokens.push(Char(c)); - i += 1; - } - } - } - - Ok(Self { - tokens, - original: pattern.to_string(), - is_recursive, - is_editorconfig, - }) - } - - fn from_tokens(tokens: Vec, original: String, is_recursive: bool) -> Self { - Self { - tokens, - original, - is_recursive, - is_editorconfig: false, - } - } - - /// Escape metacharacters within the given string by surrounding them in - /// brackets. The resulting string will, when compiled into a `Pattern`, - /// match the input string and nothing else. - pub fn escape(s: &str) -> String { - let mut escaped = String::new(); - for c in s.chars() { - match c { - // note that ! does not need escaping because it is only special - // inside brackets - '?' | '*' | '[' | ']' => { - escaped.push('['); - escaped.push(c); - escaped.push(']'); - } - c => { - escaped.push(c); - } - } - } - escaped - } - - /// Return if the given `str` matches this `Pattern` using the default - /// match options (i.e. `MatchOptions::new()`). - /// - /// # Examples - /// - /// ```rust,ignore - /// use crate::Pattern; - /// - /// assert!(Pattern::new("c?t").unwrap().matches("cat")); - /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh")); - /// assert!(Pattern::new("d*g").unwrap().matches("doog")); - /// ``` - pub fn matches(&self, str: &str) -> bool { - self.matches_with(str, MatchOptions::new()) - } - - /// Return if the given `Path`, when converted to a `str`, matches this - /// `Pattern` using the default match options (i.e. `MatchOptions::new()`). - pub fn matches_path(&self, path: &Utf8Path) -> bool { - self.matches(path.as_str()) - } - - /// Return if the given `str` matches this `Pattern` using the specified - /// match options. - pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool { - self.matches_from(true, str.chars(), 0, options) == Match - } - - /// Return if the given `Path`, when converted to a `str`, matches this - /// `Pattern` using the specified match options. - pub fn matches_path_with(&self, path: &Utf8Path, options: MatchOptions) -> bool { - self.matches_with(path.as_str(), options) - } - - /// Access the original glob pattern. - pub fn as_str(&self) -> &str { - &self.original - } - - fn matches_from( - &self, - mut follows_separator: bool, - mut file: std::str::Chars, - i: usize, - options: MatchOptions, - ) -> MatchResult { - for (ti, token) in self.tokens[i..].iter().enumerate() { - match token { - AnySequence | AnyRecursiveSequence => { - // ** must be at the start. - debug_assert!(match *token { - AnyRecursiveSequence => follows_separator, - _ => true, - }); - - // Empty match - match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) { - SubPatternDoesntMatch => (), // keep trying - m => return m, - }; - - while let Some(c) = file.next() { - if follows_separator && options.require_literal_leading_dot && c == '.' { - return SubPatternDoesntMatch; - } - follows_separator = path::is_separator(c); - match *token { - AnyRecursiveSequence if !follows_separator => continue, - AnySequence - if options.require_literal_separator && follows_separator => - { - return SubPatternDoesntMatch - } - _ => (), - } - match self.matches_from( - follows_separator, - file.clone(), - i + ti + 1, - options, - ) { - SubPatternDoesntMatch => (), // keep trying - m => return m, - } - } - } - AnyPattern(patterns) => { - for pattern in patterns.iter() { - let mut tokens = pattern.tokens.clone(); - tokens.extend_from_slice(&self.tokens[(i + ti + 1)..]); - let new_pattern = Pattern::from_tokens( - tokens, - pattern.original.clone(), - pattern.is_recursive, - ); - if new_pattern.matches_from(follows_separator, file.clone(), 0, options) - == Match - { - return Match; - } - } - return SubPatternDoesntMatch; - } - _ => { - let c = match file.next() { - Some(c) => c, - None => return EntirePatternDoesntMatch, - }; - - let is_sep = path::is_separator(c); - - if !match *token { - AnyChar | AnyWithin(..) | AnyExcept(..) - if (options.require_literal_separator && is_sep) - || (follows_separator - && options.require_literal_leading_dot - && c == '.') => - { - false - } - AnyChar => true, - AnyWithin(ref specifiers) => in_char_specifiers(specifiers, c, options), - AnyExcept(ref specifiers) => !in_char_specifiers(specifiers, c, options), - Char(c2) => chars_eq(c, c2, options.case_sensitive), - AnySequence | AnyRecursiveSequence | AnyPattern(_) => unreachable!(), - } { - return SubPatternDoesntMatch; - } - follows_separator = is_sep; - } - } - } - - // Iter is fused. - if file.next().is_none() { - Match - } else { - SubPatternDoesntMatch - } - } -} - -fn parse_char_specifiers(s: &[char]) -> Vec { - let mut cs = Vec::new(); - let mut i = 0; - while i < s.len() { - if i + 3 <= s.len() && s[i + 1] == '-' { - cs.push(CharRange(s[i], s[i + 2])); - i += 3; - } else { - cs.push(SingleChar(s[i])); - i += 1; - } - } - cs -} - -fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool { - for &specifier in specifiers.iter() { - match specifier { - SingleChar(sc) => { - if chars_eq(c, sc, options.case_sensitive) { - return true; - } - } - CharRange(start, end) => { - // FIXME: work with non-ascii chars properly (issue #1347) - if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { - let start = start.to_ascii_lowercase(); - let end = end.to_ascii_lowercase(); - - let start_up = start.to_uppercase().next().unwrap(); - let end_up = end.to_uppercase().next().unwrap(); - - // only allow case insensitive matching when - // both start and end are within a-z or A-Z - if start != start_up && end != end_up { - let c = c.to_ascii_lowercase(); - if c >= start && c <= end { - return true; - } - } - } - - if c >= start && c <= end { - return true; - } - } - } - } - - false -} - -/// A helper function to determine if two chars are (possibly case-insensitively) equal. -fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool { - if cfg!(windows) && path::is_separator(a) && path::is_separator(b) { - true - } else if !case_sensitive && a.is_ascii() && b.is_ascii() { - // FIXME: work with non-ascii chars properly (issue #9084) - a.eq_ignore_ascii_case(&b) - } else { - a == b - } -} - -/// Configuration options to modify the behaviour of `Pattern::matches_with(..)`. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub struct MatchOptions { - /// Whether or not patterns should be matched in a case-sensitive manner. - /// This currently only considers upper/lower case relationships between - /// ASCII characters, but in future this might be extended to work with - /// Unicode. - pub case_sensitive: bool, - - /// Whether or not path-component separator characters (e.g. `/` on - /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or - /// `[...]`. - pub require_literal_separator: bool, - - /// Whether or not paths that contain components that start with a `.` - /// will require that `.` appears literally in the pattern; `*`, `?`, `**`, - /// or `[...]` will not match. This is useful because such files are - /// conventionally considered hidden on Unix systems and it might be - /// desirable to skip them when listing files. - pub require_literal_leading_dot: bool, -} - -impl MatchOptions { - /// Constructs a new `MatchOptions` with default field values. This is used - /// when calling functions that do not take an explicit `MatchOptions` - /// parameter. - /// - /// This function always returns this value: - /// - /// ```rust,ignore - /// MatchOptions { - /// case_sensitive: true, - /// require_literal_separator: false, - /// require_literal_leading_dot: false - /// } - /// ``` - pub fn new() -> Self { - Self { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: false, - } - } -} - -#[cfg(test)] -mod test { - use super::{MatchOptions, Pattern}; - use camino::Utf8Path; - - #[test] - fn test_pattern_from_str() { - assert!("a*b".parse::().unwrap().matches("a_b")); - assert!("a/**b".parse::().unwrap_err().pos == 4); - } - - #[test] - fn test_wildcard_errors() { - assert!(Pattern::new("a/**b").unwrap_err().pos == 4); - assert!(Pattern::new("a/bc**").unwrap_err().pos == 3); - assert!(Pattern::new("a/*****").unwrap_err().pos == 4); - assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2); - assert!(Pattern::new("a**b").unwrap_err().pos == 0); - } - - #[test] - fn test_unclosed_bracket_errors() { - assert!(Pattern::new("abc[def").unwrap_err().pos == 3); - assert!(Pattern::new("abc[!def").unwrap_err().pos == 3); - assert!(Pattern::new("abc[").unwrap_err().pos == 3); - assert!(Pattern::new("abc[!").unwrap_err().pos == 3); - assert!(Pattern::new("abc[d").unwrap_err().pos == 3); - assert!(Pattern::new("abc[!d").unwrap_err().pos == 3); - assert!(Pattern::new("abc[]").unwrap_err().pos == 3); - assert!(Pattern::new("abc[!]").unwrap_err().pos == 3); - } - - #[test] - fn test_wildcards() { - assert!(Pattern::new("a*b").unwrap().matches("a_b")); - assert!(Pattern::new("a*b*c").unwrap().matches("abc")); - assert!(!Pattern::new("a*b*c").unwrap().matches("abcd")); - assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c")); - assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c")); - assert!(Pattern::new("abc*abc*abc") - .unwrap() - .matches("abcabcabcabcabcabcabc")); - assert!(!Pattern::new("abc*abc*abc") - .unwrap() - .matches("abcabcabcabcabcabcabca")); - assert!(Pattern::new("a*a*a*a*a*a*a*a*a") - .unwrap() - .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); - assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd")); - } - - #[test] - fn test_recursive_wildcards() { - let pat = Pattern::new("some/**/needle.txt").unwrap(); - assert!(pat.matches("some/needle.txt")); - assert!(pat.matches("some/one/needle.txt")); - assert!(pat.matches("some/one/two/needle.txt")); - assert!(pat.matches("some/other/needle.txt")); - assert!(!pat.matches("some/other/notthis.txt")); - - // a single ** should be valid, for globs - // Should accept anything - let pat = Pattern::new("**").unwrap(); - assert!(pat.is_recursive); - assert!(pat.matches("abcde")); - assert!(pat.matches("")); - assert!(pat.matches(".asdf")); - assert!(pat.matches("/x/.asdf")); - - // collapse consecutive wildcards - let pat = Pattern::new("some/**/**/needle.txt").unwrap(); - assert!(pat.matches("some/needle.txt")); - assert!(pat.matches("some/one/needle.txt")); - assert!(pat.matches("some/one/two/needle.txt")); - assert!(pat.matches("some/other/needle.txt")); - assert!(!pat.matches("some/other/notthis.txt")); - - // ** can begin the pattern - let pat = Pattern::new("**/test").unwrap(); - assert!(pat.matches("one/two/test")); - assert!(pat.matches("one/test")); - assert!(pat.matches("test")); - - // /** can begin the pattern - let pat = Pattern::new("/**/test").unwrap(); - assert!(pat.matches("/one/two/test")); - assert!(pat.matches("/one/test")); - assert!(pat.matches("/test")); - assert!(!pat.matches("/one/notthis")); - assert!(!pat.matches("/notthis")); - - // Only start sub-patterns on start of path segment. - let pat = Pattern::new("**/.*").unwrap(); - assert!(pat.matches(".abc")); - assert!(pat.matches("abc/.abc")); - assert!(!pat.matches("ab.c")); - assert!(!pat.matches("abc/ab.c")); - } - - #[test] - fn test_range_pattern() { - let pat = Pattern::new("a[0-9]b").unwrap(); - for i in 0..10 { - assert!(pat.matches(&format!("a{i}b"))); - } - assert!(!pat.matches("a_b")); - - let pat = Pattern::new("a[!0-9]b").unwrap(); - for i in 0..10 { - assert!(!pat.matches(&format!("a{i}b"))); - } - assert!(pat.matches("a_b")); - - let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"]; - for &p in pats.iter() { - let pat = Pattern::new(p).unwrap(); - for c in "abcdefghijklmnopqrstuvwxyz".chars() { - assert!(pat.matches(&c.to_string())); - } - for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() { - let options = MatchOptions { - case_sensitive: false, - ..MatchOptions::new() - }; - assert!(pat.matches_with(&c.to_string(), options)); - } - assert!(pat.matches("1")); - assert!(pat.matches("2")); - assert!(pat.matches("3")); - } - - let pats = ["[abc-]", "[-abc]", "[a-c-]"]; - for &p in pats.iter() { - let pat = Pattern::new(p).unwrap(); - assert!(pat.matches("a")); - assert!(pat.matches("b")); - assert!(pat.matches("c")); - assert!(pat.matches("-")); - assert!(!pat.matches("d")); - } - - let pat = Pattern::new("[2-1]").unwrap(); - assert!(!pat.matches("1")); - assert!(!pat.matches("2")); - - assert!(Pattern::new("[-]").unwrap().matches("-")); - assert!(!Pattern::new("[!-]").unwrap().matches("-")); - } - - #[test] - fn test_pattern_matches() { - let txt_pat = Pattern::new("*hello.txt").unwrap(); - assert!(txt_pat.matches("hello.txt")); - assert!(txt_pat.matches("gareth_says_hello.txt")); - assert!(txt_pat.matches("some/path/to/hello.txt")); - assert!(txt_pat.matches("some\\path\\to\\hello.txt")); - assert!(txt_pat.matches("/an/absolute/path/to/hello.txt")); - assert!(!txt_pat.matches("hello.txt-and-then-some")); - assert!(!txt_pat.matches("goodbye.txt")); - - let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap(); - assert!(dir_pat.matches("some/path/to/hello.txt")); - assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt")); - assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some")); - assert!(!dir_pat.matches("some/other/path/to/hello.txt")); - } - - #[test] - fn test_pattern_escape() { - let s = "_[_]_?_*_!_"; - assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string()); - assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s)); - } - - #[test] - fn test_pattern_matches_case_insensitive() { - let pat = Pattern::new("aBcDeFg").unwrap(); - let options = MatchOptions { - case_sensitive: false, - require_literal_separator: false, - require_literal_leading_dot: false, - }; - - assert!(pat.matches_with("aBcDeFg", options)); - assert!(pat.matches_with("abcdefg", options)); - assert!(pat.matches_with("ABCDEFG", options)); - assert!(pat.matches_with("AbCdEfG", options)); - } - - #[test] - fn test_pattern_matches_case_insensitive_range() { - let pat_within = Pattern::new("[a]").unwrap(); - let pat_except = Pattern::new("[!a]").unwrap(); - - let options_case_insensitive = MatchOptions { - case_sensitive: false, - require_literal_separator: false, - require_literal_leading_dot: false, - }; - let options_case_sensitive = MatchOptions { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: false, - }; - - assert!(pat_within.matches_with("a", options_case_insensitive)); - assert!(pat_within.matches_with("A", options_case_insensitive)); - assert!(!pat_within.matches_with("A", options_case_sensitive)); - - assert!(!pat_except.matches_with("a", options_case_insensitive)); - assert!(!pat_except.matches_with("A", options_case_insensitive)); - assert!(pat_except.matches_with("A", options_case_sensitive)); - } - - #[test] - fn test_pattern_matches_require_literal_separator() { - let options_require_literal = MatchOptions { - case_sensitive: true, - require_literal_separator: true, - require_literal_leading_dot: false, - }; - let options_not_require_literal = MatchOptions { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: false, - }; - - assert!(Pattern::new("abc/def") - .unwrap() - .matches_with("abc/def", options_require_literal)); - assert!(!Pattern::new("abc?def") - .unwrap() - .matches_with("abc/def", options_require_literal)); - assert!(!Pattern::new("abc*def") - .unwrap() - .matches_with("abc/def", options_require_literal)); - assert!(!Pattern::new("abc[/]def") - .unwrap() - .matches_with("abc/def", options_require_literal)); - - assert!(Pattern::new("abc/def") - .unwrap() - .matches_with("abc/def", options_not_require_literal)); - assert!(Pattern::new("abc?def") - .unwrap() - .matches_with("abc/def", options_not_require_literal)); - assert!(Pattern::new("abc*def") - .unwrap() - .matches_with("abc/def", options_not_require_literal)); - assert!(Pattern::new("abc[/]def") - .unwrap() - .matches_with("abc/def", options_not_require_literal)); - } - - #[test] - fn test_pattern_matches_require_literal_leading_dot() { - let options_require_literal_leading_dot = MatchOptions { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: true, - }; - let options_not_require_literal_leading_dot = MatchOptions { - case_sensitive: true, - require_literal_separator: false, - require_literal_leading_dot: false, - }; - - let f = |options| { - Pattern::new("*.txt") - .unwrap() - .matches_with(".hello.txt", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(!f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new(".*.*") - .unwrap() - .matches_with(".hello.txt", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new("aaa/bbb/*") - .unwrap() - .matches_with("aaa/bbb/.ccc", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(!f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new("aaa/bbb/*") - .unwrap() - .matches_with("aaa/bbb/c.c.c.", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new("aaa/bbb/.*") - .unwrap() - .matches_with("aaa/bbb/.ccc", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new("aaa/?bbb") - .unwrap() - .matches_with("aaa/.bbb", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(!f(options_require_literal_leading_dot)); - - let f = |options| { - Pattern::new("aaa/[.]bbb") - .unwrap() - .matches_with("aaa/.bbb", options) - }; - assert!(f(options_not_require_literal_leading_dot)); - assert!(!f(options_require_literal_leading_dot)); - - let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options); - assert!(f(options_not_require_literal_leading_dot)); - assert!(!f(options_require_literal_leading_dot)); - } - - #[test] - fn test_matches_path() { - // on windows, (Utf8Path::new("a/b").as_str().unwrap() == "a\\b"), so this - // tests that / and \ are considered equivalent on windows - assert!(Pattern::new("a/b") - .unwrap() - .matches_path(Utf8Path::new("a/b"))); - } - - #[test] - fn test_path_join() { - let pattern = Utf8Path::new("one").join(Utf8Path::new("**/*.rs")); - assert!(Pattern::new(pattern.as_str()).is_ok()); - } - - #[test] - fn test_pattern_relative() { - assert!(Pattern::new("./b") - .unwrap() - .matches_path(Utf8Path::new("a/b"))); - assert!(Pattern::new("b") - .unwrap() - .matches_path(Utf8Path::new("a/b"))); - - if cfg!(windows) { - assert!(Pattern::new(".\\b") - .unwrap() - .matches_path(Utf8Path::new("a\\b"))); - assert!(Pattern::new("b") - .unwrap() - .matches_path(Utf8Path::new("a\\b"))); - } - } - - #[test] - fn test_pattern_absolute() { - assert!(Pattern::new("/a/b") - .unwrap() - .matches_path(Utf8Path::new("/a/b"))); - - if cfg!(windows) { - assert!(Pattern::new("c:/a/b") - .unwrap() - .matches_path(Utf8Path::new("c:/a/b"))); - assert!(Pattern::new("C:\\a\\b") - .unwrap() - .matches_path(Utf8Path::new("C:\\a\\b"))); - - assert!(Pattern::new("\\\\?\\c:\\a\\b") - .unwrap() - .matches_path(Utf8Path::new("\\\\?\\c:\\a\\b"))); - assert!(Pattern::new("\\\\?\\C:/a/b") - .unwrap() - .matches_path(Utf8Path::new("\\\\?\\C:/a/b"))); - } - } - - #[test] - fn test_pattern_glob() { - assert!(Pattern::new("*.js") - .unwrap() - .matches_path(Utf8Path::new("b/c.js"))); - - assert!(Pattern::new("**/*.js") - .unwrap() - .matches_path(Utf8Path::new("b/c.js"))); - - assert!(Pattern::new("*.js") - .unwrap() - .matches_path(Utf8Path::new("/a/b/c.js"))); - - assert!(Pattern::new("**/*.js") - .unwrap() - .matches_path(Utf8Path::new("/a/b/c.js"))); - - if cfg!(windows) { - assert!(Pattern::new("*.js") - .unwrap() - .matches_path(Utf8Path::new("C:\\a\\b\\c.js"))); - - assert!(Pattern::new("**/*.js") - .unwrap() - .matches_path(Utf8Path::new("\\\\?\\C:\\a\\b\\c.js"))); - } - } - - #[test] - fn test_pattern_glob_brackets() { - let pattern = Pattern::parse("{foo.js,bar.js}", true).unwrap(); - assert!(pattern.matches_path(Utf8Path::new("foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("bar.js"))); - assert!(!pattern.matches_path(Utf8Path::new("baz.js"))); - - let pattern = Pattern::parse("{foo,bar}.js", true).unwrap(); - assert!(pattern.matches_path(Utf8Path::new("foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("bar.js"))); - assert!(!pattern.matches_path(Utf8Path::new("baz.js"))); - - assert!(Pattern::parse("**/{foo,bar}.js", true) - .unwrap() - .matches_path(Utf8Path::new("a/b/foo.js"))); - - let pattern = Pattern::parse("src/{a/foo,bar}.js", true).unwrap(); - assert!(pattern.matches_path(Utf8Path::new("src/a/foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("src/bar.js"))); - assert!(!pattern.matches_path(Utf8Path::new("src/a/b/foo.js"))); - assert!(!pattern.matches_path(Utf8Path::new("src/a/bar.js"))); - - let pattern = Pattern::parse("src/{a,b}/{c,d}/foo.js", true).unwrap(); - assert!(pattern.matches_path(Utf8Path::new("src/a/c/foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("src/a/d/foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("src/b/c/foo.js"))); - assert!(pattern.matches_path(Utf8Path::new("src/b/d/foo.js"))); - assert!(!pattern.matches_path(Utf8Path::new("src/bar/foo.js"))); - - let _ = Pattern::parse("{{foo,bar},baz}", true) - .expect_err("should not allow curly brackets more than 1 level deep"); - } - - #[test] - fn test_pattern_glob_brackets_not_available_by_default() { - // RODO: Remove this test when we make brackets available by default in Biome 2.0 - let pattern = Pattern::parse("{foo.js,bar.js}", false).unwrap(); - assert!(!pattern.matches_path(Utf8Path::new("foo.js"))); - assert!(!pattern.matches_path(Utf8Path::new("bar.js"))); - assert!(!pattern.matches_path(Utf8Path::new("baz.js"))); - } -} diff --git a/crates/biome_service/src/settings.rs b/crates/biome_service/src/settings.rs index 6bfc57be625f..791a89bf5f47 100644 --- a/crates/biome_service/src/settings.rs +++ b/crates/biome_service/src/settings.rs @@ -1,5 +1,5 @@ use crate::workspace::DocumentFileSource; -use crate::{Matcher, WorkspaceError}; +use crate::WorkspaceError; use biome_analyze::{AnalyzerOptions, AnalyzerRules, RuleDomain}; use biome_configuration::analyzer::assist::{Actions, AssistConfiguration, AssistEnabled}; use biome_configuration::analyzer::{LinterEnabled, RuleDomainValue}; @@ -15,7 +15,7 @@ use biome_configuration::{ CssConfiguration, FilesConfiguration, FilesIgnoreUnknownEnabled, FormatterConfiguration, GraphqlConfiguration, GritConfiguration, JsConfiguration, JsonConfiguration, LinterConfiguration, OverrideAssistConfiguration, OverrideFormatterConfiguration, - OverrideLinterConfiguration, Overrides, Rules, + OverrideGlobs, OverrideLinterConfiguration, Overrides, Rules, }; use biome_css_formatter::context::CssFormatOptions; use biome_css_parser::CssParserOptions; @@ -605,7 +605,6 @@ pub struct Includes { /// Otherwise this filtered out all files that doesn't match. globs: Option>, } - impl Includes { fn new( working_directory: Option, @@ -651,6 +650,42 @@ impl Includes { } } +#[derive(Clone, Default, Debug)] +pub struct OverrideIncludes { + /// This path is used to normalize the tested paths against [Self::globs]. + working_directory: Option, + /// If `None`, then all files are included + /// Otherwise this filtered out all files that doesn't match. + globs: Option, +} +impl OverrideIncludes { + pub fn new(working_directory: Option, globs: Option) -> Self { + Self { + working_directory, + globs, + } + } + + /// Returns `true` is no globs are set. + pub fn is_unset(&self) -> bool { + self.globs.is_none() + } + + /// Normalize `path` and match it against the list of globs. + pub fn matches(&self, path: &Utf8Path) -> bool { + let Some(globs) = self.globs.as_ref() else { + return true; + }; + let path = if let Some(working_directory) = &self.working_directory { + path.strip_prefix(working_directory).unwrap_or(path) + } else { + path + }; + let candidate_path = biome_glob::CandidatePath::new(path); + globs.is_match_candidate(&candidate_path) + } +} + fn to_file_settings( working_directory: Option, config: Option, @@ -1054,8 +1089,7 @@ impl OverrideSettings { #[derive(Clone, Debug, Default)] pub struct OverrideSettingPattern { - include: Matcher, - includes: Includes, + includes: OverrideIncludes, /// Formatter settings applied to all files in the workspaces pub formatter: OverrideFormatSettings, /// Linter settings applied to all files in the workspace @@ -1072,12 +1106,7 @@ impl OverrideSettingPattern { /// Note that only path to regular files should be passed. /// This function doesn't take directories into account. pub fn is_file_included(&self, file_path: &Utf8Path) -> bool { - self.include.matches_path(file_path) - || if !self.includes.is_unset() { - self.includes.matches_with_exceptions(file_path) - } else { - false - } + !self.includes.is_unset() && self.includes.matches(file_path) } fn apply_overrides_to_js_format_options(&self, options: &mut JsFormatOptions) { @@ -1353,8 +1382,7 @@ pub fn to_override_settings( languages.html = to_html_language_settings(html, ¤t_settings.languages.html); let pattern_setting = OverrideSettingPattern { - includes: Includes::new(working_directory.clone(), pattern.includes), - include: Matcher::from_globs(working_directory.clone(), pattern.include.as_deref())?, + includes: OverrideIncludes::new(working_directory.clone(), pattern.includes), formatter, linter, assist, diff --git a/e2e-tests/relative-apth-ignore-file/test.sh b/e2e-tests/relative-apth-ignore-file/test.sh deleted file mode 100755 index 75a053b3ed91..000000000000 --- a/e2e-tests/relative-apth-ignore-file/test.sh +++ /dev/null @@ -1,3 +0,0 @@ -set -eu - -cargo run --bin biome -- lint . diff --git a/e2e-tests/relative-apth-ignore-file/biome.json b/e2e-tests/relative-path-ignore-file/biome.json similarity index 100% rename from e2e-tests/relative-apth-ignore-file/biome.json rename to e2e-tests/relative-path-ignore-file/biome.json diff --git a/e2e-tests/relative-apth-ignore-file/file.js b/e2e-tests/relative-path-ignore-file/file.js similarity index 100% rename from e2e-tests/relative-apth-ignore-file/file.js rename to e2e-tests/relative-path-ignore-file/file.js diff --git a/e2e-tests/relative-path-ignore-file/test.sh b/e2e-tests/relative-path-ignore-file/test.sh new file mode 100755 index 000000000000..c4f7a105abf4 --- /dev/null +++ b/e2e-tests/relative-path-ignore-file/test.sh @@ -0,0 +1,3 @@ +set -eu + +! cargo run --bin biome -- lint . 2>&1 | grep -q debugger diff --git a/packages/@biomejs/backend-jsonrpc/src/workspace.ts b/packages/@biomejs/backend-jsonrpc/src/workspace.ts index e8e1fb9c4f3e..d8e4b6d84f6e 100644 --- a/packages/@biomejs/backend-jsonrpc/src/workspace.ts +++ b/packages/@biomejs/backend-jsonrpc/src/workspace.ts @@ -760,14 +760,10 @@ export interface OverridePattern { * Specific configuration for the GritQL language */ html?: HtmlConfiguration; - /** - * A list of Unix shell style patterns. Biome will include files/folders that will match these patterns. - */ - include?: string[]; /** * A list of glob patterns. Biome will include files/folders that will match these patterns. */ - includes?: Glob[]; + includes?: OverrideGlobs; /** * Specific configuration for the JavaScript language */ @@ -904,6 +900,7 @@ export interface OverrideFormatterConfiguration { */ lineWidth?: LineWidth; } +export type OverrideGlobs = Glob[]; export interface OverrideLinterConfiguration { /** * List of rules diff --git a/packages/@biomejs/biome/configuration_schema.json b/packages/@biomejs/biome/configuration_schema.json index 4e01f374f9b9..b667776b902a 100644 --- a/packages/@biomejs/biome/configuration_schema.json +++ b/packages/@biomejs/biome/configuration_schema.json @@ -2981,6 +2981,10 @@ }, "additionalProperties": false }, + "OverrideGlobs": { + "type": "array", + "items": { "$ref": "#/definitions/Glob" } + }, "OverrideLinterConfiguration": { "type": "object", "properties": { @@ -3045,15 +3049,12 @@ { "type": "null" } ] }, - "include": { - "description": "A list of Unix shell style patterns. Biome will include files/folders that will match these patterns.", - "type": ["array", "null"], - "items": { "type": "string" } - }, "includes": { "description": "A list of glob patterns. Biome will include files/folders that will match these patterns.", - "type": ["array", "null"], - "items": { "$ref": "#/definitions/Glob" } + "anyOf": [ + { "$ref": "#/definitions/OverrideGlobs" }, + { "type": "null" } + ] }, "javascript": { "description": "Specific configuration for the JavaScript language",