Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add html parser crate #2163

Merged
merged 5 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ biome_graphql_syntax = { version = "0.1.0", path = "./crates/biome_graph
biome_grit_factory = { version = "0.5.7", path = "./crates/biome_grit_factory" }
biome_grit_parser = { version = "0.1.0", path = "./crates/biome_grit_parser" }
biome_grit_syntax = { version = "0.5.7", path = "./crates/biome_grit_syntax" }
biome_html_factory = { version = "0.5.7", path = "./crates/biome_html_factory" }
biome_html_syntax = { version = "0.5.7", path = "./crates/biome_html_syntax" }
biome_js_analyze = { version = "0.5.7", path = "./crates/biome_js_analyze" }
biome_js_factory = { version = "0.5.7", path = "./crates/biome_js_factory" }
Expand Down
4 changes: 0 additions & 4 deletions crates/biome_css_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,5 @@ quickcheck = { workspace = true }
quickcheck_macros = { workspace = true }
tests_macros = { path = "../tests_macros" }

# cargo-workspaces metadata
[package.metadata.workspaces]
independent = true

[lints]
workspace = true
96 changes: 49 additions & 47 deletions crates/biome_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ mod tests;
use crate::CssParserOptions;
use biome_css_syntax::{CssSyntaxKind, CssSyntaxKind::*, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{LexContext, Lexer, LexerCheckpoint, TokenFlags};
use biome_parser::lexer::{
LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
};
use biome_unicode_table::{
is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
};
Expand Down Expand Up @@ -78,6 +80,9 @@ pub(crate) struct CssLexer<'src> {
}

impl<'src> Lexer<'src> for CssLexer<'src> {
const NEWLINE: Self::Kind = NEWLINE;

const WHITESPACE: Self::Kind = WHITESPACE;
type Kind = CssSyntaxKind;
type LexContext = CssLexContext;
type ReLexContext = CssReLexContext;
Expand All @@ -102,18 +107,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
self.diagnostics.push(diagnostic);
}

fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
LexerCheckpoint {
position: TextSize::from(self.position as u32),
current_start: self.current_start,
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.after_newline,
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}

fn next_token(&mut self, context: Self::LexContext) -> Self::Kind {
self.current_start = self.text_position();
self.current_flags = TokenFlags::empty();
Expand All @@ -140,25 +133,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
kind
}

fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
let old_position = self.position;
self.position = u32::from(self.current_start) as usize;

let re_lexed_kind = match self.current_byte() {
Some(current) => self.consume_selector_token(current),
None => EOF,
};

if self.current() == re_lexed_kind {
// Didn't re-lex anything. Return existing token again
self.position = old_position;
} else {
self.current_kind = re_lexed_kind;
}

re_lexed_kind
}

fn has_preceding_line_break(&self) -> bool {
self.current_flags.has_preceding_line_break()
}
Expand Down Expand Up @@ -197,20 +171,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
self.current_flags
}

/// Consume one newline or all whitespace until a non-whitespace or a newline is found.
///
/// ## Safety
/// Must be called at a valid UT8 char boundary
fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
if self.consume_newline() {
self.after_newline = true;
NEWLINE
} else {
self.consume_whitespaces();
WHITESPACE
}
}

#[inline]
fn advance_char_unchecked(&mut self) {
let c = self.current_char_unchecked();
Expand Down Expand Up @@ -314,7 +274,13 @@ impl<'src> CssLexer<'src> {
let dispatched = lookup_byte(current);

match dispatched {
WHS => self.consume_newline_or_whitespaces(),
WHS => {
let kind = self.consume_newline_or_whitespaces();
if kind == Self::NEWLINE {
self.after_newline = true;
}
kind
}
QOT => self.consume_string_literal(current),
SLH => self.consume_slash(),

Expand Down Expand Up @@ -1268,6 +1234,42 @@ impl<'src> CssLexer<'src> {
}
}
}

impl<'src> ReLexer<'src> for CssLexer<'src> {
fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
let old_position = self.position;
self.position = u32::from(self.current_start) as usize;

let re_lexed_kind = match self.current_byte() {
Some(current) => self.consume_selector_token(current),
None => EOF,
};

if self.current() == re_lexed_kind {
// Didn't re-lex anything. Return existing token again
self.position = old_position;
} else {
self.current_kind = re_lexed_kind;
}

re_lexed_kind
}
}

impl<'src> LexerWithCheckpoint<'src> for CssLexer<'src> {
fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
LexerCheckpoint {
position: TextSize::from(self.position as u32),
current_start: self.current_start,
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.after_newline,
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}
}

#[derive(Copy, Clone, Debug)]
enum LexStringState {
/// String that contains an invalid escape sequence
Expand Down
21 changes: 2 additions & 19 deletions crates/biome_grit_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub(crate) struct GritLexer<'src> {
}

impl<'src> Lexer<'src> for GritLexer<'src> {
const NEWLINE: Self::Kind = NEWLINE;
const WHITESPACE: Self::Kind = WHITESPACE;
type Kind = GritSyntaxKind;

type LexContext = ();
Expand All @@ -51,10 +53,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.current_start
}

fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
unimplemented!("Grit lexer doesn't support checkpoints");
}

fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind {
self.current_start = self.text_position();

Expand All @@ -79,11 +77,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {

kind
}

fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
unimplemented!("Grit lexer doesn't support re-lexing");
}

fn has_preceding_line_break(&self) -> bool {
self.after_newline
}
Expand All @@ -108,16 +101,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.diagnostics.push(diagnostic);
}

fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
if self.consume_newline() {
self.after_newline = true;
NEWLINE
} else {
self.consume_whitespaces();
WHITESPACE
}
}

#[inline]
fn advance_char_unchecked(&mut self) {
let c = self.current_char_unchecked();
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_grit_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl<'source> Parser for GritParser<'source> {
}
}

pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {
pub(crate) fn parse_root(p: &mut GritParser) {
let m = p.start();

p.eat(UNICODE_BOM);
Expand All @@ -90,7 +90,7 @@ pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {

p.expect(EOF);

m.complete(p, GRIT_ROOT)
m.complete(p, GRIT_ROOT);
}

fn parse_version(p: &mut GritParser) -> ParsedSyntax {
Expand Down
96 changes: 74 additions & 22 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading