Skip to content

Commit

Permalink
address feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
ematipico committed Mar 28, 2024
1 parent 8385e1a commit 6d1566d
Show file tree
Hide file tree
Showing 18 changed files with 416 additions and 108 deletions.
21 changes: 2 additions & 19 deletions crates/biome_grit_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub(crate) struct GritLexer<'src> {
}

impl<'src> Lexer<'src> for GritLexer<'src> {
const NEWLINE: Self::Kind = NEWLINE;
const WHITESPACE: Self::Kind = WHITESPACE;
type Kind = GritSyntaxKind;

type LexContext = ();
Expand All @@ -51,10 +53,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.current_start
}

fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
unimplemented!("Grit lexer doesn't support checkpoints");
}

fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind {
self.current_start = self.text_position();

Expand All @@ -79,11 +77,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {

kind
}

fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
unimplemented!("Grit lexer doesn't support re-lexing");
}

fn has_preceding_line_break(&self) -> bool {
self.after_newline
}
Expand All @@ -108,16 +101,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
self.diagnostics.push(diagnostic);
}

fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
if self.consume_newline() {
self.after_newline = true;
NEWLINE
} else {
self.consume_whitespaces();
WHITESPACE
}
}

#[inline]
fn advance_char_unchecked(&mut self) {
let c = self.current_char_unchecked();
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_grit_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl<'source> Parser for GritParser<'source> {
}
}

pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {
pub(crate) fn parse_root(p: &mut GritParser) {
let m = p.start();

p.eat(UNICODE_BOM);
Expand All @@ -90,7 +90,7 @@ pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {

p.expect(EOF);

m.complete(p, GRIT_ROOT)
m.complete(p, GRIT_ROOT);
}

fn parse_version(p: &mut GritParser) -> ParsedSyntax {
Expand Down
96 changes: 74 additions & 22 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 31 additions & 3 deletions crates/biome_html_factory/src/generated/syntax_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 23 additions & 10 deletions crates/biome_html_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use biome_html_syntax::HtmlSyntaxKind::{EOF, ERROR_TOKEN, NEWLINE, TOMBSTONE, WHITESPACE};
use biome_html_syntax::{HtmlSyntaxKind, TextLen, TextSize};
mod tests;

use biome_html_syntax::HtmlSyntaxKind::{
EOF, ERROR_TOKEN, NEWLINE, TOMBSTONE, UNICODE_BOM, WHITESPACE,
};
use biome_html_syntax::{HtmlSyntaxKind, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{Lexer, LexerCheckpoint, TokenFlags};
use biome_unicode_table::lookup_byte;
use biome_unicode_table::Dispatch::*;

pub(crate) struct HtmlLexer<'src> {
/// Source text
Expand Down Expand Up @@ -41,15 +43,26 @@ impl<'src> HtmlLexer<'src> {

impl<'src> HtmlLexer<'src> {
fn consume_token(&mut self, current: u8) -> HtmlSyntaxKind {
let dispatched = lookup_byte(current);

match dispatched {
WHS => self.consume_newline_or_whitespaces(),

match current {
b'\n' | b'\r' | b'\t' | b' ' => self.consume_newline_or_whitespaces(),
b'<' => self.consume_byte(T![<]),
b'>' => self.consume_byte(T![>]),
b'/' => self.consume_byte(T![/]),
b'!' => self.consume_byte(T![!]),
_ if self.position == 0 && self.consume_potential_bom(UNICODE_BOM).is_some() => {
UNICODE_BOM
}
_ => self.consume_unexpected_character(),
}
}

/// Bumps the current byte and creates a lexed token of the passed in kind.
#[inline]
fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind {
self.advance(1);
tok
}

fn consume_unexpected_character(&mut self) -> HtmlSyntaxKind {
self.assert_at_char_boundary();

Expand Down Expand Up @@ -79,7 +92,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> {
type ReLexContext = ();

fn source(&self) -> &'src str {
&self.source
self.source
}

fn current(&self) -> Self::Kind {
Expand Down
88 changes: 88 additions & 0 deletions crates/biome_html_parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#![cfg(test)]
#![allow(unused_mut, unused_variables, unused_assignments)]

use super::{HtmlLexer, TextSize};
use biome_html_syntax::HtmlSyntaxKind::{self, *};
use biome_parser::lexer::Lexer;
use biome_rowan::TextRange;

pub struct Token {
kind: HtmlSyntaxKind,
range: TextRange,
}

impl Iterator for HtmlLexer<'_> {
type Item = Token;

fn next(&mut self) -> Option<Self::Item> {
let kind = self.next_token(());
if kind == EOF {
None
} else {
Some(Token {
kind,
range: self.current_range(),
})
}
}
}

// Assert the result of lexing a piece of source code,
// and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens
macro_rules! assert_lex {
($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{
let mut lexer = HtmlLexer::from_str($src);
let mut idx = 0;
let mut tok_idx = TextSize::default();

let mut new_str = String::with_capacity($src.len());
let tokens: Vec<_> = lexer.collect();

$(
assert_eq!(
tokens[idx].kind,
HtmlSyntaxKind::$kind,
"expected token kind {}, but found {:?}",
stringify!($kind),
tokens[idx].kind,
);

assert_eq!(
tokens[idx].range.len(),
TextSize::from($len),
"expected token length of {}, but found {:?} for token {:?}",
$len,
tokens[idx].range.len(),
tokens[idx].kind,
);

new_str.push_str(&$src[tokens[idx].range]);
tok_idx += tokens[idx].range.len();

idx += 1;
)*

if idx < tokens.len() {
panic!(
"expected {} tokens but lexer returned {}, first unexpected token is '{:?}'",
idx,
tokens.len(),
tokens[idx].kind
);
} else {
assert_eq!(idx, tokens.len());
}

assert_eq!($src, new_str, "Failed to reconstruct input");
}};
}

// TODO: to fix
#[test]
#[ignore = "currently not handled"]
fn doctype() {
assert_lex! {
"doctype",
ERROR_TOKEN:1,
}
}
Loading

0 comments on commit 6d1566d

Please sign in to comment.