Skip to content

Commit

Permalink
chore: add html parser crate (#2163)
Browse files Browse the repository at this point in the history
  • Loading branch information
ematipico authored Mar 29, 2024
1 parent 5e219db commit 66a99f6
Show file tree
Hide file tree
Showing 28 changed files with 1,172 additions and 204 deletions.
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ biome_graphql_syntax = { version = "0.1.0", path = "./crates/biome_graph
biome_grit_factory = { version = "0.5.7", path = "./crates/biome_grit_factory" }
biome_grit_parser = { version = "0.1.0", path = "./crates/biome_grit_parser" }
biome_grit_syntax = { version = "0.5.7", path = "./crates/biome_grit_syntax" }
biome_html_factory = { version = "0.5.7", path = "./crates/biome_html_factory" }
biome_html_syntax = { version = "0.5.7", path = "./crates/biome_html_syntax" }
biome_js_analyze = { version = "0.5.7", path = "./crates/biome_js_analyze" }
biome_js_factory = { version = "0.5.7", path = "./crates/biome_js_factory" }
Expand Down
4 changes: 0 additions & 4 deletions crates/biome_css_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,5 @@ quickcheck = { workspace = true }
quickcheck_macros = { workspace = true }
tests_macros = { path = "../tests_macros" }

# cargo-workspaces metadata
[package.metadata.workspaces]
independent = true

[lints]
workspace = true
96 changes: 49 additions & 47 deletions crates/biome_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ mod tests;
use crate::CssParserOptions;
use biome_css_syntax::{CssSyntaxKind, CssSyntaxKind::*, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{LexContext, Lexer, LexerCheckpoint, TokenFlags};
use biome_parser::lexer::{
LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
};
use biome_unicode_table::{
is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
};
Expand Down Expand Up @@ -78,6 +80,9 @@ pub(crate) struct CssLexer<'src> {
}

impl<'src> Lexer<'src> for CssLexer<'src> {
const NEWLINE: Self::Kind = NEWLINE;

const WHITESPACE: Self::Kind = WHITESPACE;
type Kind = CssSyntaxKind;
type LexContext = CssLexContext;
type ReLexContext = CssReLexContext;
Expand All @@ -102,18 +107,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
self.diagnostics.push(diagnostic);
}

fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
LexerCheckpoint {
position: TextSize::from(self.position as u32),
current_start: self.current_start,
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.after_newline,
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}

fn next_token(&mut self, context: Self::LexContext) -> Self::Kind {
self.current_start = self.text_position();
self.current_flags = TokenFlags::empty();
Expand All @@ -140,25 +133,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
kind
}

fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
let old_position = self.position;
self.position = u32::from(self.current_start) as usize;

let re_lexed_kind = match self.current_byte() {
Some(current) => self.consume_selector_token(current),
None => EOF,
};

if self.current() == re_lexed_kind {
// Didn't re-lex anything. Return existing token again
self.position = old_position;
} else {
self.current_kind = re_lexed_kind;
}

re_lexed_kind
}

fn has_preceding_line_break(&self) -> bool {
self.current_flags.has_preceding_line_break()
}
Expand Down Expand Up @@ -197,20 +171,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
self.current_flags
}

/// Consume one newline or all whitespace until a non-whitespace or a newline is found.
///
/// ## Safety
/// Must be called at a valid UT8 char boundary
fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
if self.consume_newline() {
self.after_newline = true;
NEWLINE
} else {
self.consume_whitespaces();
WHITESPACE
}
}

#[inline]
fn advance_char_unchecked(&mut self) {
let c = self.current_char_unchecked();
Expand Down Expand Up @@ -314,7 +274,13 @@ impl<'src> CssLexer<'src> {
let dispatched = lookup_byte(current);

match dispatched {
WHS => self.consume_newline_or_whitespaces(),
WHS => {
let kind = self.consume_newline_or_whitespaces();
if kind == Self::NEWLINE {
self.after_newline = true;
}
kind
}
QOT => self.consume_string_literal(current),
SLH => self.consume_slash(),

Expand Down Expand Up @@ -1268,6 +1234,42 @@ impl<'src> CssLexer<'src> {
}
}
}

impl<'src> ReLexer<'src> for CssLexer<'src> {
fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
let old_position = self.position;
self.position = u32::from(self.current_start) as usize;

let re_lexed_kind = match self.current_byte() {
Some(current) => self.consume_selector_token(current),
None => EOF,
};

if self.current() == re_lexed_kind {
// Didn't re-lex anything. Return existing token again
self.position = old_position;
} else {
self.current_kind = re_lexed_kind;
}

re_lexed_kind
}
}

impl<'src> LexerWithCheckpoint<'src> for CssLexer<'src> {
fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
LexerCheckpoint {
position: TextSize::from(self.position as u32),
current_start: self.current_start,
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.after_newline,
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}
}

#[derive(Copy, Clone, Debug)]
enum LexStringState {
/// String that contains an invalid escape sequence
Expand Down
21 changes: 2 additions & 19 deletions crates/biome_grit_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub(crate) struct GritLexer<'src> {
}

impl<'src> Lexer<'src> for GritLexer<'src> {
const NEWLINE: Self::Kind = NEWLINE;
const WHITESPACE: Self::Kind = WHITESPACE;
type Kind = GritSyntaxKind;

type LexContext = ();
Expand All @@ -51,10 +53,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.current_start
}

fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
unimplemented!("Grit lexer doesn't support checkpoints");
}

fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind {
self.current_start = self.text_position();

Expand All @@ -79,11 +77,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {

kind
}

fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
unimplemented!("Grit lexer doesn't support re-lexing");
}

fn has_preceding_line_break(&self) -> bool {
self.after_newline
}
Expand All @@ -108,16 +101,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.diagnostics.push(diagnostic);
}

fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
if self.consume_newline() {
self.after_newline = true;
NEWLINE
} else {
self.consume_whitespaces();
WHITESPACE
}
}

#[inline]
fn advance_char_unchecked(&mut self) {
let c = self.current_char_unchecked();
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_grit_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl<'source> Parser for GritParser<'source> {
}
}

pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {
pub(crate) fn parse_root(p: &mut GritParser) {
let m = p.start();

p.eat(UNICODE_BOM);
Expand All @@ -90,7 +90,7 @@ pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {

p.expect(EOF);

m.complete(p, GRIT_ROOT)
m.complete(p, GRIT_ROOT);
}

fn parse_version(p: &mut GritParser) -> ParsedSyntax {
Expand Down
96 changes: 74 additions & 22 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 66a99f6

Please sign in to comment.