diff --git a/Cargo.lock b/Cargo.lock
index 4407504f2a34..6c4e21af5ca2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -482,6 +482,23 @@ dependencies = [
  "biome_rowan",
 ]
 
+[[package]]
+name = "biome_html_parser"
+version = "0.0.0"
+dependencies = [
+ "biome_console",
+ "biome_diagnostics",
+ "biome_html_factory",
+ "biome_html_syntax",
+ "biome_parser",
+ "biome_rowan",
+ "biome_test_utils",
+ "biome_unicode_table",
+ "insta",
+ "tests_macros",
+ "tracing",
+]
+
 [[package]]
 name = "biome_html_syntax"
 version = "0.5.7"
diff --git a/Cargo.toml b/Cargo.toml
index 3b4c55e50eb6..c9486eea4719 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -116,6 +116,7 @@ biome_graphql_syntax         = { version = "0.1.0", path = "./crates/biome_graph
 biome_grit_factory           = { version = "0.5.7", path = "./crates/biome_grit_factory" }
 biome_grit_parser            = { version = "0.1.0", path = "./crates/biome_grit_parser" }
 biome_grit_syntax            = { version = "0.5.7", path = "./crates/biome_grit_syntax" }
+biome_html_factory           = { version = "0.5.7", path = "./crates/biome_html_factory" }
 biome_html_syntax            = { version = "0.5.7", path = "./crates/biome_html_syntax" }
 biome_js_analyze             = { version = "0.5.7", path = "./crates/biome_js_analyze" }
 biome_js_factory             = { version = "0.5.7", path = "./crates/biome_js_factory" }
diff --git a/crates/biome_css_parser/Cargo.toml b/crates/biome_css_parser/Cargo.toml
index 762b3cf75038..5499aebcd3a5 100644
--- a/crates/biome_css_parser/Cargo.toml
+++ b/crates/biome_css_parser/Cargo.toml
@@ -27,9 +27,5 @@ quickcheck        = { workspace = true }
 quickcheck_macros = { workspace = true }
 tests_macros      = { path = "../tests_macros" }
 
-# cargo-workspaces metadata
-[package.metadata.workspaces]
-independent = true
-
 [lints]
 workspace = true
diff --git a/crates/biome_css_parser/src/lexer/mod.rs b/crates/biome_css_parser/src/lexer/mod.rs
index 3d747472d650..e6631ed9e8d2 100644
--- a/crates/biome_css_parser/src/lexer/mod.rs
+++ b/crates/biome_css_parser/src/lexer/mod.rs
@@ -5,7 +5,9 @@ mod tests;
 use crate::CssParserOptions;
 use biome_css_syntax::{CssSyntaxKind, CssSyntaxKind::*, TextLen, TextSize, T};
 use biome_parser::diagnostic::ParseDiagnostic;
-use biome_parser::lexer::{LexContext, Lexer, LexerCheckpoint, TokenFlags};
+use biome_parser::lexer::{
+    LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
+};
 use biome_unicode_table::{
     is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
 };
@@ -78,6 +80,9 @@ pub(crate) struct CssLexer<'src> {
 }
 
 impl<'src> Lexer<'src> for CssLexer<'src> {
+    const NEWLINE: Self::Kind = NEWLINE;
+
+    const WHITESPACE: Self::Kind = WHITESPACE;
     type Kind = CssSyntaxKind;
     type LexContext = CssLexContext;
     type ReLexContext = CssReLexContext;
@@ -102,18 +107,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
         self.diagnostics.push(diagnostic);
     }
 
-    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
-        LexerCheckpoint {
-            position: TextSize::from(self.position as u32),
-            current_start: self.current_start,
-            current_flags: self.current_flags,
-            current_kind: self.current_kind,
-            after_line_break: self.after_newline,
-            unicode_bom_length: self.unicode_bom_length,
-            diagnostics_pos: self.diagnostics.len() as u32,
-        }
-    }
-
     fn next_token(&mut self, context: Self::LexContext) -> Self::Kind {
         self.current_start = self.text_position();
         self.current_flags = TokenFlags::empty();
@@ -140,25 +133,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
         kind
     }
 
-    fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
-        let old_position = self.position;
-        self.position = u32::from(self.current_start) as usize;
-
-        let re_lexed_kind = match self.current_byte() {
-            Some(current) => self.consume_selector_token(current),
-            None => EOF,
-        };
-
-        if self.current() == re_lexed_kind {
-            // Didn't re-lex anything. Return existing token again
-            self.position = old_position;
-        } else {
-            self.current_kind = re_lexed_kind;
-        }
-
-        re_lexed_kind
-    }
-
     fn has_preceding_line_break(&self) -> bool {
         self.current_flags.has_preceding_line_break()
     }
@@ -197,20 +171,6 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
         self.current_flags
     }
 
-    /// Consume one newline or all whitespace until a non-whitespace or a newline is found.
-    ///
-    /// ## Safety
-    /// Must be called at a valid UT8 char boundary
-    fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
-        if self.consume_newline() {
-            self.after_newline = true;
-            NEWLINE
-        } else {
-            self.consume_whitespaces();
-            WHITESPACE
-        }
-    }
-
     #[inline]
     fn advance_char_unchecked(&mut self) {
         let c = self.current_char_unchecked();
@@ -314,7 +274,13 @@ impl<'src> CssLexer<'src> {
         let dispatched = lookup_byte(current);
 
         match dispatched {
-            WHS => self.consume_newline_or_whitespaces(),
+            WHS => {
+                let kind = self.consume_newline_or_whitespaces();
+                if kind == Self::NEWLINE {
+                    self.after_newline = true;
+                }
+                kind
+            }
             QOT => self.consume_string_literal(current),
             SLH => self.consume_slash(),
 
@@ -1268,6 +1234,42 @@ impl<'src> CssLexer<'src> {
         }
     }
 }
+
+impl<'src> ReLexer<'src> for CssLexer<'src> {
+    fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
+        let old_position = self.position;
+        self.position = u32::from(self.current_start) as usize;
+
+        let re_lexed_kind = match self.current_byte() {
+            Some(current) => self.consume_selector_token(current),
+            None => EOF,
+        };
+
+        if self.current() == re_lexed_kind {
+            // Didn't re-lex anything. Return existing token again
+            self.position = old_position;
+        } else {
+            self.current_kind = re_lexed_kind;
+        }
+
+        re_lexed_kind
+    }
+}
+
+impl<'src> LexerWithCheckpoint<'src> for CssLexer<'src> {
+    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
+        LexerCheckpoint {
+            position: TextSize::from(self.position as u32),
+            current_start: self.current_start,
+            current_flags: self.current_flags,
+            current_kind: self.current_kind,
+            after_line_break: self.after_newline,
+            unicode_bom_length: self.unicode_bom_length,
+            diagnostics_pos: self.diagnostics.len() as u32,
+        }
+    }
+}
+
 #[derive(Copy, Clone, Debug)]
 enum LexStringState {
     /// String that contains an invalid escape sequence
diff --git a/crates/biome_grit_parser/src/lexer/mod.rs b/crates/biome_grit_parser/src/lexer/mod.rs
index 0cfec0c48ff6..690f9f9835d4 100644
--- a/crates/biome_grit_parser/src/lexer/mod.rs
+++ b/crates/biome_grit_parser/src/lexer/mod.rs
@@ -34,6 +34,8 @@ pub(crate) struct GritLexer<'src> {
 }
 
 impl<'src> Lexer<'src> for GritLexer<'src> {
+    const NEWLINE: Self::Kind = NEWLINE;
+    const WHITESPACE: Self::Kind = WHITESPACE;
     type Kind = GritSyntaxKind;
 
     type LexContext = ();
@@ -51,10 +53,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
         self.current_start
     }
 
-    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
-        unimplemented!("Grit lexer doesn't support checkpoints");
-    }
-
     fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind {
         self.current_start = self.text_position();
 
@@ -79,11 +77,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
 
         kind
     }
-
-    fn re_lex(&mut self, _context: Self::ReLexContext) -> Self::Kind {
-        unimplemented!("Grit lexer doesn't support re-lexing");
-    }
-
     fn has_preceding_line_break(&self) -> bool {
         self.after_newline
     }
@@ -108,16 +101,6 @@ impl<'src> Lexer<'src> for GritLexer<'src> {
         self.diagnostics.push(diagnostic);
     }
 
-    fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
-        if self.consume_newline() {
-            self.after_newline = true;
-            NEWLINE
-        } else {
-            self.consume_whitespaces();
-            WHITESPACE
-        }
-    }
-
     #[inline]
     fn advance_char_unchecked(&mut self) {
         let c = self.current_char_unchecked();
diff --git a/crates/biome_grit_parser/src/parser/mod.rs b/crates/biome_grit_parser/src/parser/mod.rs
index 3bfe0b52a524..64a29527f97b 100644
--- a/crates/biome_grit_parser/src/parser/mod.rs
+++ b/crates/biome_grit_parser/src/parser/mod.rs
@@ -78,7 +78,7 @@ impl<'source> Parser for GritParser<'source> {
     }
 }
 
-pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {
+pub(crate) fn parse_root(p: &mut GritParser) {
     let m = p.start();
 
     p.eat(UNICODE_BOM);
@@ -90,7 +90,7 @@ pub(crate) fn parse_root(p: &mut GritParser) -> CompletedMarker {
 
     p.expect(EOF);
 
-    m.complete(p, GRIT_ROOT)
+    m.complete(p, GRIT_ROOT);
 }
 
 fn parse_version(p: &mut GritParser) -> ParsedSyntax {
diff --git a/crates/biome_html_factory/src/generated/node_factory.rs b/crates/biome_html_factory/src/generated/node_factory.rs
index ebb5167507cf..778699f90bd4 100644
--- a/crates/biome_html_factory/src/generated/node_factory.rs
+++ b/crates/biome_html_factory/src/generated/node_factory.rs
@@ -64,18 +64,64 @@ pub fn html_closing_element(
 pub fn html_directive(
     l_angle_token: SyntaxToken,
     excl_token: SyntaxToken,
-    content: HtmlString,
+    doctype_token: SyntaxToken,
     r_angle_token: SyntaxToken,
-) -> HtmlDirective {
-    HtmlDirective::unwrap_cast(SyntaxNode::new_detached(
-        HtmlSyntaxKind::HTML_DIRECTIVE,
-        [
-            Some(SyntaxElement::Token(l_angle_token)),
-            Some(SyntaxElement::Token(excl_token)),
-            Some(SyntaxElement::Node(content.into_syntax())),
-            Some(SyntaxElement::Token(r_angle_token)),
-        ],
-    ))
+) -> HtmlDirectiveBuilder {
+    HtmlDirectiveBuilder {
+        l_angle_token,
+        excl_token,
+        doctype_token,
+        r_angle_token,
+        html_token: None,
+        quirk_token: None,
+        public_id_token: None,
+        system_id_token: None,
+    }
+}
+pub struct HtmlDirectiveBuilder {
+    l_angle_token: SyntaxToken,
+    excl_token: SyntaxToken,
+    doctype_token: SyntaxToken,
+    r_angle_token: SyntaxToken,
+    html_token: Option<SyntaxToken>,
+    quirk_token: Option<SyntaxToken>,
+    public_id_token: Option<SyntaxToken>,
+    system_id_token: Option<SyntaxToken>,
+}
+impl HtmlDirectiveBuilder {
+    pub fn with_html_token(mut self, html_token: SyntaxToken) -> Self {
+        self.html_token = Some(html_token);
+        self
+    }
+    pub fn with_quirk_token(mut self, quirk_token: SyntaxToken) -> Self {
+        self.quirk_token = Some(quirk_token);
+        self
+    }
+    pub fn with_public_id_token(mut self, public_id_token: SyntaxToken) -> Self {
+        self.public_id_token = Some(public_id_token);
+        self
+    }
+    pub fn with_system_id_token(mut self, system_id_token: SyntaxToken) -> Self {
+        self.system_id_token = Some(system_id_token);
+        self
+    }
+    pub fn build(self) -> HtmlDirective {
+        HtmlDirective::unwrap_cast(SyntaxNode::new_detached(
+            HtmlSyntaxKind::HTML_DIRECTIVE,
+            [
+                Some(SyntaxElement::Token(self.l_angle_token)),
+                Some(SyntaxElement::Token(self.excl_token)),
+                Some(SyntaxElement::Token(self.doctype_token)),
+                self.html_token.map(|token| SyntaxElement::Token(token)),
+                self.quirk_token.map(|token| SyntaxElement::Token(token)),
+                self.public_id_token
+                    .map(|token| SyntaxElement::Token(token)),
+                self.system_id_token
+                    .map(|token| SyntaxElement::Token(token)),
+                Some(SyntaxElement::Token(self.r_angle_token)),
+            ],
+        ))
+    }
 }
 pub fn html_element(
     opening_element: HtmlOpeningElement,
@@ -113,36 +159,42 @@ pub fn html_opening_element(
         ],
     ))
 }
-pub fn html_root(
-    directive: HtmlDirective,
-    tags: HtmlElementList,
-    eof_token: SyntaxToken,
-) -> HtmlRootBuilder {
+pub fn html_root(eof_token: SyntaxToken) -> HtmlRootBuilder {
     HtmlRootBuilder {
-        directive,
-        tags,
         eof_token,
         bom_token: None,
+        directive: None,
+        html: None,
     }
 }
 pub struct HtmlRootBuilder {
-    directive: HtmlDirective,
-    tags: HtmlElementList,
     eof_token: SyntaxToken,
     bom_token: Option<SyntaxToken>,
+    directive: Option<HtmlDirective>,
+    html: Option<HtmlElement>,
 }
 impl HtmlRootBuilder {
     pub fn with_bom_token(mut self, bom_token: SyntaxToken) -> Self {
         self.bom_token = Some(bom_token);
         self
     }
+    pub fn with_directive(mut self, directive: HtmlDirective) -> Self {
+        self.directive = Some(directive);
+        self
+    }
+    pub fn with_html(mut self, html: HtmlElement) -> Self {
+        self.html = Some(html);
+        self
+    }
     pub fn build(self) -> HtmlRoot {
         HtmlRoot::unwrap_cast(SyntaxNode::new_detached(
             HtmlSyntaxKind::HTML_ROOT,
             [
                 self.bom_token.map(|token| SyntaxElement::Token(token)),
-                Some(SyntaxElement::Node(self.directive.into_syntax())),
-                Some(SyntaxElement::Node(self.tags.into_syntax())),
+                self.directive
+                    .map(|token| SyntaxElement::Node(token.into_syntax())),
+                self.html
+                    .map(|token| SyntaxElement::Node(token.into_syntax())),
                 Some(SyntaxElement::Token(self.eof_token)),
             ],
         ))
diff --git a/crates/biome_html_factory/src/generated/syntax_factory.rs b/crates/biome_html_factory/src/generated/syntax_factory.rs
index beab1db6a5ba..95b0b19a24c6 100644
--- a/crates/biome_html_factory/src/generated/syntax_factory.rs
+++ b/crates/biome_html_factory/src/generated/syntax_factory.rs
@@ -109,7 +109,7 @@ impl SyntaxFactory for HtmlSyntaxFactory {
             }
             HTML_DIRECTIVE => {
                 let mut elements = (&children).into_iter();
-                let mut slots: RawNodeSlots<4usize> = RawNodeSlots::default();
+                let mut slots: RawNodeSlots<8usize> = RawNodeSlots::default();
                 let mut current_element = elements.next();
                 if let Some(element) = &current_element {
                     if element.kind() == T ! [<] {
@@ -126,7 +126,35 @@ impl SyntaxFactory for HtmlSyntaxFactory {
                 }
                 slots.next_slot();
                 if let Some(element) = &current_element {
-                    if HtmlString::can_cast(element.kind()) {
+                    if element.kind() == T![doctype] {
+                        slots.mark_present();
+                        current_element = elements.next();
+                    }
+                }
+                slots.next_slot();
+                if let Some(element) = &current_element {
+                    if element.kind() == T![html] {
+                        slots.mark_present();
+                        current_element = elements.next();
+                    }
+                }
+                slots.next_slot();
+                if let Some(element) = &current_element {
+                    if element.kind() == HTML_LITERAL {
+                        slots.mark_present();
+                        current_element = elements.next();
+                    }
+                }
+                slots.next_slot();
+                if let Some(element) = &current_element {
+                    if element.kind() == HTML_STRING_LITERAL {
+                        slots.mark_present();
+                        current_element = elements.next();
+                    }
+                }
+                slots.next_slot();
+                if let Some(element) = &current_element {
+                    if element.kind() == HTML_STRING_LITERAL {
                         slots.mark_present();
                         current_element = elements.next();
                     }
@@ -258,7 +286,7 @@ impl SyntaxFactory for HtmlSyntaxFactory {
                 }
                 slots.next_slot();
                 if let Some(element) = &current_element {
-                    if HtmlElementList::can_cast(element.kind()) {
+                    if HtmlElement::can_cast(element.kind()) {
                         slots.mark_present();
                         current_element = elements.next();
                     }
diff --git a/crates/biome_html_parser/Cargo.toml b/crates/biome_html_parser/Cargo.toml
new file mode 100644
index 000000000000..7ae7af1b9e74
--- /dev/null
+++ b/crates/biome_html_parser/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+authors.workspace    = true
+categories.workspace = true
+description          = "<DESCRIPTION>"
+edition.workspace    = true
+homepage.workspace   = true
+keywords.workspace   = true
+license.workspace    = true
+name                 = "biome_html_parser"
+repository.workspace = true
+version              = "0.0.0"
+
+[lints]
+workspace = true
+
+[dependencies]
+biome_console       = { workspace = true }
+biome_diagnostics   = { workspace = true }
+biome_html_factory  = { workspace = true }
+biome_html_syntax   = { workspace = true }
+biome_parser        = { workspace = true }
+biome_rowan         = { workspace = true }
+biome_unicode_table = { workspace = true }
+tracing             = { workspace = true }
+
+
+[dev-dependencies]
+biome_test_utils = { path = "../biome_test_utils" }
+insta            = { workspace = true }
+tests_macros     = { path = "../tests_macros" }
diff --git a/crates/biome_html_parser/src/lexer/mod.rs b/crates/biome_html_parser/src/lexer/mod.rs
new file mode 100644
index 000000000000..aaf4afcfac81
--- /dev/null
+++ b/crates/biome_html_parser/src/lexer/mod.rs
@@ -0,0 +1,161 @@
+mod tests;
+
+use biome_html_syntax::HtmlSyntaxKind::{
+    EOF, ERROR_TOKEN, NEWLINE, TOMBSTONE, UNICODE_BOM, WHITESPACE,
+};
+use biome_html_syntax::{HtmlSyntaxKind, TextLen, TextSize, T};
+use biome_parser::diagnostic::ParseDiagnostic;
+use biome_parser::lexer::{Lexer, LexerCheckpoint, TokenFlags};
+
+pub(crate) struct HtmlLexer<'src> {
+    /// Source text
+    source: &'src str,
+
+    /// The start byte position in the source text of the next token.
+    position: usize,
+
+    current_kind: HtmlSyntaxKind,
+
+    current_start: TextSize,
+
+    diagnostics: Vec<ParseDiagnostic>,
+
+    current_flags: TokenFlags,
+
+    preceding_line_break: bool,
+    after_newline: bool,
+}
+
+impl<'src> HtmlLexer<'src> {
+    pub fn from_str(string: &'src str) -> Self {
+        Self {
+            source: string,
+            position: 0,
+            diagnostics: vec![],
+            current_start: TextSize::from(0),
+            current_kind: TOMBSTONE,
+            preceding_line_break: false,
+            after_newline: false,
+            current_flags: TokenFlags::empty(),
+        }
+    }
+}
+
+impl<'src> HtmlLexer<'src> {
+    fn consume_token(&mut self, current: u8) -> HtmlSyntaxKind {
+        match current {
+            b'\n' | b'\r' | b'\t' | b' ' => self.consume_newline_or_whitespaces(),
+            b'<' => self.consume_byte(T![<]),
+            b'>' => self.consume_byte(T![>]),
+            b'/' => self.consume_byte(T![/]),
+            b'!' => self.consume_byte(T![!]),
+            _ if self.position == 0 && self.consume_potential_bom(UNICODE_BOM).is_some() => {
+                UNICODE_BOM
+            }
+            _ => self.consume_unexpected_character(),
+        }
+    }
+
+    /// Bumps the current byte and creates a lexed token of the passed in kind.
+    #[inline]
+    fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind {
+        self.advance(1);
+        tok
+    }
+
+    fn consume_unexpected_character(&mut self) -> HtmlSyntaxKind {
+        self.assert_at_char_boundary();
+
+        let char = self.current_char_unchecked();
+        let err = ParseDiagnostic::new(
+            format!("Unexpected character `{}`", char),
+            self.text_position()..self.text_position() + char.text_len(),
+        );
+        self.diagnostics.push(err);
+        self.advance(char.len_utf8());
+
+        ERROR_TOKEN
+    }
+
+    /// Asserts that the lexer is at a UTF8 char boundary
+    #[inline]
+    fn assert_at_char_boundary(&self) {
+        debug_assert!(self.source.is_char_boundary(self.position));
+    }
+}
+
+impl<'src> Lexer<'src> for HtmlLexer<'src> {
+    const NEWLINE: Self::Kind = NEWLINE;
+    const WHITESPACE: Self::Kind = WHITESPACE;
+    type Kind = HtmlSyntaxKind;
+    type LexContext = ();
+    type ReLexContext = ();
+
+    fn source(&self) -> &'src str {
+        self.source
+    }
+
+    fn current(&self) -> Self::Kind {
+        self.current_kind
+    }
+
+    fn current_start(&self) -> TextSize {
+        self.current_start
+    }
+
+    fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind {
+        self.current_start = TextSize::from(self.position as u32);
+        self.current_flags = TokenFlags::empty();
+
+        let kind = if self.is_eof() {
+            EOF
+        } else {
+            match self.current_byte() {
+                Some(current) => self.consume_token(current),
+                None => EOF,
+            }
+        };
+
+        self.current_flags
+            .set(TokenFlags::PRECEDING_LINE_BREAK, self.after_newline);
+        self.current_kind = kind;
+
+        if !kind.is_trivia() {
+            self.after_newline = false;
+        }
+
+        kind
+    }
+    fn has_preceding_line_break(&self) -> bool {
+        self.preceding_line_break
+    }
+
+    fn has_unicode_escape(&self) -> bool {
+        self.current_flags.has_unicode_escape()
+    }
+
+    fn rewind(&mut self, _checkpoint: LexerCheckpoint<Self::Kind>) {
+        unreachable!("no need")
+    }
+
+    fn finish(self) -> Vec<ParseDiagnostic> {
+        self.diagnostics
+    }
+
+    fn position(&self) -> usize {
+        self.position
+    }
+
+    fn push_diagnostic(&mut self, diagnostic: ParseDiagnostic) {
+        self.diagnostics.push(diagnostic);
+    }
+
+    fn advance_char_unchecked(&mut self) {
+        let c = self.current_char_unchecked();
+        self.position += c.len_utf8();
+    }
+
+    fn advance(&mut self, n: usize) {
+        self.position += n;
+    }
+}
diff --git a/crates/biome_html_parser/src/lexer/tests.rs b/crates/biome_html_parser/src/lexer/tests.rs
new file mode 100644
index 000000000000..2dadd18ceea1
--- /dev/null
+++ b/crates/biome_html_parser/src/lexer/tests.rs
@@ -0,0 +1,88 @@
+#![cfg(test)]
+#![allow(unused_mut, unused_variables, unused_assignments)]
+
+use super::{HtmlLexer, TextSize};
+use biome_html_syntax::HtmlSyntaxKind::{self, *};
+use biome_parser::lexer::Lexer;
+use biome_rowan::TextRange;
+
+pub struct Token {
+    kind: HtmlSyntaxKind,
+    range: TextRange,
+}
+
+impl Iterator for HtmlLexer<'_> {
+    type Item = Token;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let kind = self.next_token(());
+        if kind == EOF {
+            None
+        } else {
+            Some(Token {
+                kind,
+                range: self.current_range(),
+            })
+        }
+    }
+}
+
+// Assert the result of lexing a piece of source code,
+// and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens
+macro_rules! assert_lex {
+    ($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{
+        let mut lexer = HtmlLexer::from_str($src);
+        let mut idx = 0;
+        let mut tok_idx = TextSize::default();
+
+        let mut new_str = String::with_capacity($src.len());
+        let tokens: Vec<_> = lexer.collect();
+
+        $(
+            assert_eq!(
+                tokens[idx].kind,
+                HtmlSyntaxKind::$kind,
+                "expected token kind {}, but found {:?}",
+                stringify!($kind),
+                tokens[idx].kind,
+            );
+
+            assert_eq!(
+                tokens[idx].range.len(),
+                TextSize::from($len),
+                "expected token length of {}, but found {:?} for token {:?}",
+                $len,
+                tokens[idx].range.len(),
+                tokens[idx].kind,
+            );
+
+            new_str.push_str(&$src[tokens[idx].range]);
+            tok_idx += tokens[idx].range.len();
+
+            idx += 1;
+        )*
+
+        if idx < tokens.len() {
+            panic!(
+                "expected {} tokens but lexer returned {}, first unexpected token is '{:?}'",
+                idx,
+                tokens.len(),
+                tokens[idx].kind
+            );
+        } else {
+            assert_eq!(idx, tokens.len());
+        }
+
+        assert_eq!($src, new_str, "Failed to reconstruct input");
+    }};
+}
+
+// TODO: to fix
+#[test]
+#[ignore = "currently not handled"]
+fn doctype() {
+    assert_lex! {
+        "doctype",
+        ERROR_TOKEN:1,
+    }
+}
diff --git a/crates/biome_html_parser/src/lib.rs b/crates/biome_html_parser/src/lib.rs
new file mode 100644
index 000000000000..5e1b8522186a
--- /dev/null
+++ b/crates/biome_html_parser/src/lib.rs
@@ -0,0 +1,93 @@
+mod lexer;
+mod parser;
+mod syntax;
+mod token_source;
+
+use crate::parser::{HtmlLosslessTreeSink, HtmlParser};
+use crate::syntax::parse_root;
+use biome_html_syntax::{HtmlRoot, HtmlSyntaxNode};
+use biome_parser::diagnostic::ParseDiagnostic;
+use biome_rowan::{AstNode, NodeCache};
+
+/// Parses the provided string as HTML program using the provided node cache.
+pub fn parse_html_with_cache(source: &str, cache: &mut NodeCache) -> HtmlParse {
+    tracing::debug_span!("Parsing phase").in_scope(move || {
+        let mut parser = HtmlParser::new(source);
+
+        parse_root(&mut parser);
+
+        let (events, diagnostics, trivia) = parser.finish();
+
+        let mut tree_sink = HtmlLosslessTreeSink::with_cache(source, &trivia, cache);
+        biome_parser::event::process(&mut tree_sink, events, diagnostics);
+        let (green, diagnostics) = tree_sink.finish();
+
+        HtmlParse::new(green, diagnostics)
+    })
+}
+pub fn parse_html(source: &str) -> HtmlParse {
+    let mut cache = NodeCache::default();
+    parse_html_with_cache(source, &mut cache)
+}
+
+/// A utility struct for managing the result of a parser job
+#[derive(Debug)]
+pub struct HtmlParse {
+    root: HtmlSyntaxNode,
+    diagnostics: Vec<ParseDiagnostic>,
+}
+
+impl HtmlParse {
+    pub fn new(root: HtmlSyntaxNode, diagnostics: Vec<ParseDiagnostic>) -> Self {
+        Self { root, diagnostics }
+    }
+
+    /// The syntax node represented by this Parse result
+    ///
+    /// ```
+    /// # use biome_html_parser::parse_html;
+    /// # use biome_html_syntax::HtmlSyntaxKind;
+    /// # use biome_rowan::{AstNode, AstNodeList, SyntaxError};
+    ///
+    /// # fn main() -> Result<(), SyntaxError> {
+    /// use biome_html_syntax::HtmlSyntaxKind;
+    /// // let parse = parse_html(r#"<html></html>"#);
+    ///
+    /// // Get the root value
+    /// // let root_value = parse.tree().html()?;
+    ///
+    /// // assert_eq!(root_value.syntax().kind(), HtmlSyntaxKind::HTML_ELEMENT);
+    ///
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn syntax(&self) -> HtmlSyntaxNode {
+        self.root.clone()
+    }
+
+    /// Get the diagnostics which occurred when parsing
+    pub fn diagnostics(&self) -> &[ParseDiagnostic] {
+        &self.diagnostics
+    }
+
+    /// Get the diagnostics which occurred when parsing
+    pub fn into_diagnostics(self) -> Vec<ParseDiagnostic> {
+        self.diagnostics
+    }
+
+    /// Returns [true] if the parser encountered some errors during the parsing.
+    pub fn has_errors(&self) -> bool {
+        self.diagnostics
+            .iter()
+            .any(|diagnostic| diagnostic.is_error())
+    }
+
+    /// Convert this parse result into a typed AST node.
+    ///
+    /// # Panics
+    ///    
+    /// It panics if the node represented by this parse result mismatches.
+    pub fn tree(&self) -> HtmlRoot {
+        HtmlRoot::unwrap_cast(self.syntax())
+    }
+}
diff --git a/crates/biome_html_parser/src/parser.rs b/crates/biome_html_parser/src/parser.rs
new file mode 100644
index 000000000000..0c535d04d2d5
--- /dev/null
+++ b/crates/biome_html_parser/src/parser.rs
@@ -0,0 +1,61 @@
+use crate::token_source::HtmlTokenSource;
+use biome_html_factory::HtmlSyntaxFactory;
+use biome_html_syntax::{HtmlLanguage, HtmlSyntaxKind};
+use biome_parser::diagnostic::{merge_diagnostics, ParseDiagnostic};
+use biome_parser::event::Event;
+use biome_parser::prelude::*;
+use biome_parser::tree_sink::LosslessTreeSink;
+use biome_parser::{Parser, ParserContext};
+
+pub(crate) type HtmlLosslessTreeSink<'source> =
+    LosslessTreeSink<'source, HtmlLanguage, HtmlSyntaxFactory>;
+
+pub(crate) struct HtmlParser<'source> {
+    context: ParserContext<HtmlSyntaxKind>,
+    source: HtmlTokenSource<'source>,
+}
+
+impl<'source> HtmlParser<'source> {
+    pub fn new(source: &'source str) -> Self {
+        Self {
+            context: ParserContext::default(),
+            source: HtmlTokenSource::from_str(source),
+        }
+    }
+
+    pub fn finish(
+        self,
+    ) -> (
+        Vec<Event<HtmlSyntaxKind>>,
+        Vec<ParseDiagnostic>,
+        Vec<Trivia>,
+    ) {
+        let (trivia, lexer_diagnostics) = self.source.finish();
+        let (events, parse_diagnostics) = self.context.finish();
+
+        let diagnostics = merge_diagnostics(lexer_diagnostics, parse_diagnostics);
+
+        (events, diagnostics, trivia)
+    }
+}
+
+impl<'src> Parser for HtmlParser<'src> {
+    type Kind = HtmlSyntaxKind;
+    type Source = HtmlTokenSource<'src>;
+
+    fn context(&self) -> &ParserContext<Self::Kind> {
+        &self.context
+    }
+
+    fn context_mut(&mut self) -> &mut ParserContext<Self::Kind> {
+        &mut self.context
+    }
+
+    fn source(&self) -> &Self::Source {
+        &self.source
+    }
+
+    fn source_mut(&mut self) -> &mut Self::Source {
+        &mut self.source
+    }
+}
diff --git a/crates/biome_html_parser/src/syntax/mod.rs b/crates/biome_html_parser/src/syntax/mod.rs
new file mode 100644
index 000000000000..759a5c085b9f
--- /dev/null
+++ b/crates/biome_html_parser/src/syntax/mod.rs
@@ -0,0 +1,34 @@
+use crate::parser::HtmlParser;
+use biome_html_syntax::HtmlSyntaxKind::{HTML_DIRECTIVE, HTML_ROOT, UNICODE_BOM};
+use biome_html_syntax::T;
+use biome_parser::prelude::ParsedSyntax::Absent;
+use biome_parser::prelude::*;
+use biome_parser::Parser;
+
+pub(crate) fn parse_root(p: &mut HtmlParser) {
+    let m = p.start();
+
+    p.eat(UNICODE_BOM);
+
+    parse_doc_type(p).ok();
+
+    m.complete(p, HTML_ROOT);
+}
+
+fn parse_doc_type(p: &mut HtmlParser) -> ParsedSyntax {
+    if !p.at(T![<]) {
+        return Absent;
+    }
+
+    let m = p.start();
+    p.eat(T![<]);
+    p.bump(T![!]);
+
+    if p.at(T![doctype]) {
+        p.eat(T![doctype]);
+    }
+
+    p.eat(T![>]);
+
+    ParsedSyntax::Present(m.complete(p, HTML_DIRECTIVE))
+}
diff --git a/crates/biome_html_parser/src/token_source.rs b/crates/biome_html_parser/src/token_source.rs
new file mode 100644
index 000000000000..c3deace10579
--- /dev/null
+++ b/crates/biome_html_parser/src/token_source.rs
@@ -0,0 +1,129 @@
+use crate::lexer::HtmlLexer;
+use biome_html_syntax::HtmlSyntaxKind::{EOF, TOMBSTONE};
+use biome_html_syntax::{HtmlSyntaxKind, TextRange};
+use biome_parser::diagnostic::ParseDiagnostic;
+use biome_parser::lexer::{LexContext, Lexer};
+use biome_parser::token_source::{TokenSource, Trivia};
+use biome_rowan::TriviaPieceKind;
+
+pub(crate) struct HtmlTokenSource<'source> {
+    lexer: HtmlLexer<'source>,
+    trivia: Vec<Trivia>,
+    current: HtmlSyntaxKind,
+    current_range: TextRange,
+    preceding_line_break: bool,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
+pub enum HtmlLexContext {
+    /// Default context: no particular rules are applied to the lexer logic.
+    #[default]
+    Regular,
+}
+
+impl HtmlLexContext {
+    const fn is_regular(&self) -> bool {
+        matches!(self, Self::Regular)
+    }
+}
+
+impl LexContext for HtmlLexContext {
+    fn is_regular(&self) -> bool {
+        self.is_regular()
+    }
+}
+
+impl<'source> HtmlTokenSource<'source> {
+    pub fn from_str(source: &'source str) -> Self {
+        let lexer = HtmlLexer::from_str(source);
+
+        let mut source = Self {
+            lexer,
+            trivia: Vec::new(),
+            current: TOMBSTONE,
+            current_range: TextRange::default(),
+            preceding_line_break: false,
+        };
+
+        source.next_non_trivia_token(true);
+        source
+    }
+
+    fn next_non_trivia_token(&mut self, first_token: bool) {
+        let mut trailing = !first_token;
+        self.preceding_line_break = false;
+
+        loop {
+            let token = self.lexer.next_token(());
+            if token == EOF {
+                break;
+            } else {
+                let trivia_kind = TriviaPieceKind::try_from(token);
+
+                match trivia_kind {
+                    Err(_) => {
+                        self.current = token;
+                        self.current_range = self.lexer.current_range();
+                        // Not trivia
+                        break;
+                    }
+
+                    Ok(trivia_kind) => {
+                        if trivia_kind.is_newline() {
+                            trailing = false;
+                            self.preceding_line_break = true;
+                        }
+
+                        self.trivia.push(Trivia::new(
+                            trivia_kind,
+                            self.lexer.current_range(),
+                            trailing,
+                        ));
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<'src> TokenSource for HtmlTokenSource<'src> {
+    type Kind = HtmlSyntaxKind;
+
+    fn current(&self) -> Self::Kind {
+        self.current
+    }
+
+    fn current_range(&self) -> TextRange {
+        self.current_range
+    }
+
+    fn text(&self) -> &str {
+        self.lexer.source()
+    }
+
+    fn has_preceding_line_break(&self) -> bool {
+        self.lexer.has_preceding_line_break()
+    }
+
+    fn bump(&mut self) {
+        if self.current != EOF {
+            self.next_non_trivia_token(false)
+        }
+    }
+
+    fn skip_as_trivia(&mut self) {
+        if self.current() != EOF {
+            self.trivia.push(Trivia::new(
+                TriviaPieceKind::Skipped,
+                self.current_range(),
+                false,
+            ));
+
+            self.next_non_trivia_token(false)
+        }
+    }
+
+    fn finish(self) -> (Vec<Trivia>, Vec<ParseDiagnostic>) {
+        (self.trivia, self.lexer.finish())
+    }
+}
diff --git a/crates/biome_html_parser/tests/html_specs/ok/ok.html b/crates/biome_html_parser/tests/html_specs/ok/ok.html
new file mode 100644
index 000000000000..8b137891791f
--- /dev/null
+++ b/crates/biome_html_parser/tests/html_specs/ok/ok.html
@@ -0,0 +1 @@
+
diff --git a/crates/biome_html_parser/tests/html_specs/ok/ok.html.snap b/crates/biome_html_parser/tests/html_specs/ok/ok.html.snap
new file mode 100644
index 000000000000..2d7fc5b1a66c
--- /dev/null
+++ b/crates/biome_html_parser/tests/html_specs/ok/ok.html.snap
@@ -0,0 +1,33 @@
+---
+source: crates/biome_html_parser/tests/spec_test.rs
+expression: snapshot
+---
+## Input
+
+```css
+
+
+```
+
+
+## AST
+
+```
+HtmlRoot {
+    bom_token: missing (optional),
+    directive: missing (optional),
+    html: missing (optional),
+    eof_token: EOF@0..1 "" [Newline("\n")] [],
+}
+```
+
+## CST
+
+```
+0: HTML_ROOT@0..1
+  0: (empty)
+  1: (empty)
+  2: (empty)
+  3: EOF@0..1 "" [Newline("\n")] []
+
+```
diff --git a/crates/biome_html_parser/tests/spec_test.rs b/crates/biome_html_parser/tests/spec_test.rs
new file mode 100644
index 000000000000..e56948f05628
--- /dev/null
+++ b/crates/biome_html_parser/tests/spec_test.rs
@@ -0,0 +1,128 @@
+use biome_console::fmt::{Formatter, Termcolor};
+use biome_console::markup;
+use biome_diagnostics::{termcolor, DiagnosticExt, PrintDiagnostic};
+use biome_html_parser::parse_html;
+use biome_rowan::SyntaxKind;
+use biome_test_utils::has_bogus_nodes_or_empty_slots;
+use std::fmt::Write;
+use std::fs;
+use std::path::Path;
+
+#[derive(Copy, Clone)]
+pub enum ExpectedOutcome {
+    Pass,
+    Fail,
+    Undefined,
+}
+
+pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_str: &str) {
+    let outcome = match outcome_str {
+        "ok" => ExpectedOutcome::Pass,
+        "error" => ExpectedOutcome::Fail,
+        "undefined" => ExpectedOutcome::Undefined,
+        _ => panic!("Invalid expected outcome {outcome_str}"),
+    };
+
+    let test_case_path = Path::new(test_case);
+
+    let file_name = test_case_path
+        .file_name()
+        .expect("Expected test to have a file name")
+        .to_str()
+        .expect("File name to be valid UTF8");
+
+    let content = fs::read_to_string(test_case_path)
+        .expect("Expected test path to be a readable file in UTF8 encoding");
+
+    let parsed = parse_html(&content);
+    let formatted_ast = format!("{:#?}", parsed.tree());
+
+    let mut snapshot = String::new();
+    writeln!(snapshot, "\n## Input\n\n```css\n{content}\n```\n\n").unwrap();
+
+    writeln!(
+        snapshot,
+        r#"## AST
+
+```
+{formatted_ast}
+```
+
+## CST
+
+```
+{:#?}
+```
+"#,
+        parsed.syntax()
+    )
+    .unwrap();
+
+    let diagnostics = parsed.diagnostics();
+    if !diagnostics.is_empty() {
+        let mut diagnostics_buffer = termcolor::Buffer::no_color();
+
+        let termcolor = &mut Termcolor(&mut diagnostics_buffer);
+        let mut formatter = Formatter::new(termcolor);
+
+        for diagnostic in diagnostics {
+            let error = diagnostic
+                .clone()
+                .with_file_path(file_name)
+                .with_file_source_code(&content);
+
+            formatter
+                .write_markup(markup! {
+                    {PrintDiagnostic::verbose(&error)}
+                })
+                .expect("failed to emit diagnostic");
+        }
+
+        let formatted_diagnostics =
+            std::str::from_utf8(diagnostics_buffer.as_slice()).expect("non utf8 in error buffer");
+
+        if matches!(outcome, ExpectedOutcome::Pass) {
+            panic!("Expected no errors to be present in a test case that is expected to pass but the following diagnostics are present:\n{formatted_diagnostics}")
+        }
+
+        writeln!(snapshot, "## Diagnostics\n\n```").unwrap();
+        snapshot.write_str(formatted_diagnostics).unwrap();
+
+        writeln!(snapshot, "```\n").unwrap();
+    }
+
+    match outcome {
+        ExpectedOutcome::Pass => {
+            let missing_required = formatted_ast.contains("missing (required)");
+            if missing_required
+                || parsed
+                    .syntax()
+                    .descendants()
+                    .any(|node| node.kind().is_bogus())
+            {
+                panic!("Parsed tree of a 'OK' test case should not contain any missing required children or bogus nodes: \n {formatted_ast:#?} \n\n {}", formatted_ast);
+            }
+
+            let syntax = parsed.syntax();
+            if has_bogus_nodes_or_empty_slots(&syntax) {
+                panic!(
+                    "modified tree has bogus nodes or empty slots:\n{syntax:#?} \n\n {}",
+                    syntax
+                )
+            }
+        }
+        ExpectedOutcome::Fail => {
+            if parsed.diagnostics().is_empty() {
+                panic!("Failing test must have diagnostics");
+            }
+        }
+        _ => {}
+    }
+
+    insta::with_settings!({
+        prepend_module_to_snapshot => false,
+        snapshot_path => &test_directory,
+    }, {
+        insta::assert_snapshot!(file_name, snapshot);
+    });
+}
diff --git a/crates/biome_html_parser/tests/spec_tests.rs b/crates/biome_html_parser/tests/spec_tests.rs
new file mode 100644
index 000000000000..f835579ffd31
--- /dev/null
+++ b/crates/biome_html_parser/tests/spec_tests.rs
@@ -0,0 +1,8 @@
+#![allow(non_snake_case)]
+
+mod spec_test;
+
+mod ok {
+    tests_macros::gen_tests! {"tests/html_specs/ok/**/*.html", crate::spec_test::run, "ok"}
+    tests_macros::gen_tests! {"tests/html_specs/error/**/*.html", crate::spec_test::run, "error"}
+}
diff --git a/crates/biome_html_syntax/src/generated/kind.rs b/crates/biome_html_syntax/src/generated/kind.rs
index 3e25d00222e1..07a16de7e642 100644
--- a/crates/biome_html_syntax/src/generated/kind.rs
+++ b/crates/biome_html_syntax/src/generated/kind.rs
@@ -21,7 +21,10 @@ pub enum HtmlSyntaxKind {
     NULL_KW,
     TRUE_KW,
     FALSE_KW,
+    DOCTYPE_KW,
+    HTML_KW,
     HTML_STRING_LITERAL,
+    HTML_LITERAL,
     ERROR_TOKEN,
     NEWLINE,
     WHITESPACE,
@@ -55,7 +58,7 @@ impl HtmlSyntaxKind {
     }
     pub const fn is_literal(self) -> bool {
         match self {
-            HTML_STRING_LITERAL => true,
+            HTML_STRING_LITERAL | HTML_LITERAL => true,
             _ => false,
         }
     }
@@ -70,6 +73,8 @@ impl HtmlSyntaxKind {
             "null" => NULL_KW,
             "true" => TRUE_KW,
             "false" => FALSE_KW,
+            "doctype" => DOCTYPE_KW,
+            "html" => HTML_KW,
             _ => return None,
         };
         Some(kw)
@@ -84,6 +89,8 @@ impl HtmlSyntaxKind {
             NULL_KW => "null",
             TRUE_KW => "true",
             FALSE_KW => "false",
+            DOCTYPE_KW => "doctype",
+            HTML_KW => "html",
             HTML_STRING_LITERAL => "string literal",
             _ => return None,
         };
@@ -92,4 +99,4 @@ impl HtmlSyntaxKind {
 }
 #[doc = r" Utility macro for creating a SyntaxKind through simple macro syntax"]
 #[macro_export]
-macro_rules ! T { [<] => { $ crate :: HtmlSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: HtmlSyntaxKind :: R_ANGLE } ; [/] => { $ crate :: HtmlSyntaxKind :: SLASH } ; [=] => { $ crate :: HtmlSyntaxKind :: EQ } ; [!] => { $ crate :: HtmlSyntaxKind :: BANG } ; [null] => { $ crate :: HtmlSyntaxKind :: NULL_KW } ; [true] => { $ crate :: HtmlSyntaxKind :: TRUE_KW } ; [false] => { $ crate :: HtmlSyntaxKind :: FALSE_KW } ; [ident] => { $ crate :: HtmlSyntaxKind :: IDENT } ; [EOF] => { $ crate :: HtmlSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: HtmlSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: HtmlSyntaxKind :: HASH } ; }
+macro_rules ! T { [<] => { $ crate :: HtmlSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: HtmlSyntaxKind :: R_ANGLE } ; [/] => { $ crate :: HtmlSyntaxKind :: SLASH } ; [=] => { $ crate :: HtmlSyntaxKind :: EQ } ; [!] => { $ crate :: HtmlSyntaxKind :: BANG } ; [null] => { $ crate :: HtmlSyntaxKind :: NULL_KW } ; [true] => { $ crate :: HtmlSyntaxKind :: TRUE_KW } ; [false] => { $ crate :: HtmlSyntaxKind :: FALSE_KW } ; [doctype] => { $ crate :: HtmlSyntaxKind :: DOCTYPE_KW } ; [html] => { $ crate :: HtmlSyntaxKind :: HTML_KW } ; [ident] => { $ crate :: HtmlSyntaxKind :: IDENT } ; [EOF] => { $ crate :: HtmlSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: HtmlSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: HtmlSyntaxKind :: HASH } ; }
diff --git a/crates/biome_html_syntax/src/generated/nodes.rs b/crates/biome_html_syntax/src/generated/nodes.rs
index 430859542684..1fa5742d55fd 100644
--- a/crates/biome_html_syntax/src/generated/nodes.rs
+++ b/crates/biome_html_syntax/src/generated/nodes.rs
@@ -175,7 +175,11 @@ impl HtmlDirective {
         HtmlDirectiveFields {
             l_angle_token: self.l_angle_token(),
             excl_token: self.excl_token(),
-            content: self.content(),
+            doctype_token: self.doctype_token(),
+            html_token: self.html_token(),
+            quirk_token: self.quirk_token(),
+            public_id_token: self.public_id_token(),
+            system_id_token: self.system_id_token(),
             r_angle_token: self.r_angle_token(),
         }
     }
@@ -185,11 +189,23 @@ impl HtmlDirective {
     pub fn excl_token(&self) -> SyntaxResult<SyntaxToken> {
         support::required_token(&self.syntax, 1usize)
     }
-    pub fn content(&self) -> SyntaxResult<HtmlString> {
-        support::required_node(&self.syntax, 2usize)
+    pub fn doctype_token(&self) -> SyntaxResult<SyntaxToken> {
+        support::required_token(&self.syntax, 2usize)
+    }
+    pub fn html_token(&self) -> Option<SyntaxToken> {
+        support::token(&self.syntax, 3usize)
+    }
+    pub fn quirk_token(&self) -> Option<SyntaxToken> {
+        support::token(&self.syntax, 4usize)
+    }
+    pub fn public_id_token(&self) -> Option<SyntaxToken> {
+        support::token(&self.syntax, 5usize)
+    }
+    pub fn system_id_token(&self) -> Option<SyntaxToken> {
+        support::token(&self.syntax, 6usize)
     }
     pub fn r_angle_token(&self) -> SyntaxResult<SyntaxToken> {
-        support::required_token(&self.syntax, 3usize)
+        support::required_token(&self.syntax, 7usize)
     }
 }
 #[cfg(feature = "serde")]
@@ -205,7 +221,11 @@ impl Serialize for HtmlDirective {
 pub struct HtmlDirectiveFields {
     pub l_angle_token: SyntaxResult<SyntaxToken>,
     pub excl_token: SyntaxResult<SyntaxToken>,
-    pub content: SyntaxResult<HtmlString>,
+    pub doctype_token: SyntaxResult<SyntaxToken>,
+    pub html_token: Option<SyntaxToken>,
+    pub quirk_token: Option<SyntaxToken>,
+    pub public_id_token: Option<SyntaxToken>,
+    pub system_id_token: Option<SyntaxToken>,
     pub r_angle_token: SyntaxResult<SyntaxToken>,
 }
 #[derive(Clone, PartialEq, Eq, Hash)]
@@ -359,18 +379,18 @@ impl HtmlRoot {
         HtmlRootFields {
             bom_token: self.bom_token(),
             directive: self.directive(),
-            tags: self.tags(),
+            html: self.html(),
             eof_token: self.eof_token(),
         }
     }
     pub fn bom_token(&self) -> Option<SyntaxToken> {
         support::token(&self.syntax, 0usize)
     }
-    pub fn directive(&self) -> SyntaxResult<HtmlDirective> {
-        support::required_node(&self.syntax, 1usize)
+    pub fn directive(&self) -> Option<HtmlDirective> {
+        support::node(&self.syntax, 1usize)
     }
-    pub fn tags(&self) -> HtmlElementList {
-        support::list(&self.syntax, 2usize)
+    pub fn html(&self) -> Option<HtmlElement> {
+        support::node(&self.syntax, 2usize)
     }
     pub fn eof_token(&self) -> SyntaxResult<SyntaxToken> {
         support::required_token(&self.syntax, 3usize)
@@ -388,8 +408,8 @@ impl Serialize for HtmlRoot {
 #[cfg_attr(feature = "serde", derive(Serialize))]
 pub struct HtmlRootFields {
     pub bom_token: Option<SyntaxToken>,
-    pub directive: SyntaxResult<HtmlDirective>,
-    pub tags: HtmlElementList,
+    pub directive: Option<HtmlDirective>,
+    pub html: Option<HtmlElement>,
     pub eof_token: SyntaxResult<SyntaxToken>,
 }
 #[derive(Clone, PartialEq, Eq, Hash)]
@@ -664,7 +684,26 @@ impl std::fmt::Debug for HtmlDirective {
                 &support::DebugSyntaxResult(self.l_angle_token()),
             )
             .field("excl_token", &support::DebugSyntaxResult(self.excl_token()))
-            .field("content", &support::DebugSyntaxResult(self.content()))
+            .field(
+                "doctype_token",
+                &support::DebugSyntaxResult(self.doctype_token()),
+            )
+            .field(
+                "html_token",
+                &support::DebugOptionalElement(self.html_token()),
+            )
+            .field(
+                "quirk_token",
+                &support::DebugOptionalElement(self.quirk_token()),
+            )
+            .field(
+                "public_id_token",
+                &support::DebugOptionalElement(self.public_id_token()),
+            )
+            .field(
+                "system_id_token",
+                &support::DebugOptionalElement(self.system_id_token()),
+            )
             .field(
                 "r_angle_token",
                 &support::DebugSyntaxResult(self.r_angle_token()),
@@ -844,8 +883,11 @@ impl std::fmt::Debug for HtmlRoot {
                 "bom_token",
                 &support::DebugOptionalElement(self.bom_token()),
             )
-            .field("directive", &support::DebugSyntaxResult(self.directive()))
-            .field("tags", &self.tags())
+            .field(
+                "directive",
+                &support::DebugOptionalElement(self.directive()),
+            )
+            .field("html", &support::DebugOptionalElement(self.html()))
             .field("eof_token", &support::DebugSyntaxResult(self.eof_token()))
             .finish()
     }
diff --git a/crates/biome_html_syntax/src/generated/nodes_mut.rs b/crates/biome_html_syntax/src/generated/nodes_mut.rs
index 21dfbdf5b624..29bb21edcabc 100644
--- a/crates/biome_html_syntax/src/generated/nodes_mut.rs
+++ b/crates/biome_html_syntax/src/generated/nodes_mut.rs
@@ -70,16 +70,40 @@ impl HtmlDirective {
                 .splice_slots(1usize..=1usize, once(Some(element.into()))),
         )
     }
-    pub fn with_content(self, element: HtmlString) -> Self {
+    pub fn with_doctype_token(self, element: SyntaxToken) -> Self {
         Self::unwrap_cast(
             self.syntax
-                .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))),
+                .splice_slots(2usize..=2usize, once(Some(element.into()))),
+        )
+    }
+    pub fn with_html_token(self, element: Option<SyntaxToken>) -> Self {
+        Self::unwrap_cast(
+            self.syntax
+                .splice_slots(3usize..=3usize, once(element.map(|element| element.into()))),
+        )
+    }
+    pub fn with_quirk_token(self, element: Option<SyntaxToken>) -> Self {
+        Self::unwrap_cast(
+            self.syntax
+                .splice_slots(4usize..=4usize, once(element.map(|element| element.into()))),
+        )
+    }
+    pub fn with_public_id_token(self, element: Option<SyntaxToken>) -> Self {
+        Self::unwrap_cast(
+            self.syntax
+                .splice_slots(5usize..=5usize, once(element.map(|element| element.into()))),
+        )
+    }
+    pub fn with_system_id_token(self, element: Option<SyntaxToken>) -> Self {
+        Self::unwrap_cast(
+            self.syntax
+                .splice_slots(6usize..=6usize, once(element.map(|element| element.into()))),
         )
     }
     pub fn with_r_angle_token(self, element: SyntaxToken) -> Self {
         Self::unwrap_cast(
             self.syntax
-                .splice_slots(3usize..=3usize, once(Some(element.into()))),
+                .splice_slots(7usize..=7usize, once(Some(element.into()))),
         )
     }
 }
@@ -144,17 +168,17 @@ impl HtmlRoot {
                 .splice_slots(0usize..=0usize, once(element.map(|element| element.into()))),
         )
     }
-    pub fn with_directive(self, element: HtmlDirective) -> Self {
-        Self::unwrap_cast(
-            self.syntax
-                .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))),
-        )
+    pub fn with_directive(self, element: Option<HtmlDirective>) -> Self {
+        Self::unwrap_cast(self.syntax.splice_slots(
+            1usize..=1usize,
+            once(element.map(|element| element.into_syntax().into())),
+        ))
     }
-    pub fn with_tags(self, element: HtmlElementList) -> Self {
-        Self::unwrap_cast(
-            self.syntax
-                .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))),
-        )
+    pub fn with_html(self, element: Option<HtmlElement>) -> Self {
+        Self::unwrap_cast(self.syntax.splice_slots(
+            2usize..=2usize,
+            once(element.map(|element| element.into_syntax().into())),
+        ))
     }
     pub fn with_eof_token(self, element: SyntaxToken) -> Self {
         Self::unwrap_cast(
diff --git a/crates/biome_js_parser/src/lexer/mod.rs b/crates/biome_js_parser/src/lexer/mod.rs
index 935eb69bab54..b4f52ebb4799 100644
--- a/crates/biome_js_parser/src/lexer/mod.rs
+++ b/crates/biome_js_parser/src/lexer/mod.rs
@@ -22,7 +22,9 @@ mod tests;
 use biome_js_syntax::JsSyntaxKind::*;
 pub use biome_js_syntax::*;
 use biome_parser::diagnostic::ParseDiagnostic;
-use biome_parser::lexer::{LexContext, Lexer, LexerCheckpoint, TokenFlags};
+use biome_parser::lexer::{
+    LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
+};
 use biome_unicode_table::{
     is_js_id_continue, is_js_id_start, lookup_byte,
     Dispatch::{self, *},
@@ -131,6 +133,9 @@ pub(crate) struct JsLexer<'src> {
 }
 
 impl<'src> Lexer<'src> for JsLexer<'src> {
+    const NEWLINE: Self::Kind = NEWLINE;
+    const WHITESPACE: Self::Kind = WHITESPACE;
+
     type Kind = JsSyntaxKind;
     type LexContext = JsLexContext;
     type ReLexContext = JsReLexContext;
@@ -158,18 +163,6 @@ impl<'src> Lexer<'src> for JsLexer<'src> {
         self.current_start
     }
 
-    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
-        LexerCheckpoint {
-            position: TextSize::from(self.position as u32),
-            current_start: self.current_start,
-            current_flags: self.current_flags,
-            current_kind: self.current_kind,
-            after_line_break: self.after_newline,
-            unicode_bom_length: self.unicode_bom_length,
-            diagnostics_pos: self.diagnostics.len() as u32,
-        }
-    }
-
     fn next_token(&mut self, context: Self::LexContext) -> Self::Kind {
         self.current_start = TextSize::from(self.position as u32);
         self.current_flags = TokenFlags::empty();
@@ -196,29 +189,6 @@ impl<'src> Lexer<'src> for JsLexer<'src> {
         kind
     }
 
-    fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind {
-        let old_position = self.position;
-        self.position = u32::from(self.current_start) as usize;
-
-        let re_lexed_kind = match context {
-            JsReLexContext::Regex if matches!(self.current(), T![/] | T![/=]) => self.read_regex(),
-            JsReLexContext::BinaryOperator => self.re_lex_binary_operator(),
-            JsReLexContext::TypeArgumentLessThan => self.re_lex_type_argument_less_than(),
-            JsReLexContext::JsxIdentifier => self.re_lex_jsx_identifier(old_position),
-            JsReLexContext::JsxChild if !self.is_eof() => self.lex_jsx_child_token(),
-            _ => self.current(),
-        };
-
-        if self.current() == re_lexed_kind {
-            // Didn't re-lex anything. Return existing token again
-            self.position = old_position;
-        } else {
-            self.current_kind = re_lexed_kind;
-        }
-
-        re_lexed_kind
-    }
-
     fn has_preceding_line_break(&self) -> bool {
         self.current_flags.has_preceding_line_break()
     }
@@ -287,6 +257,45 @@ impl<'src> Lexer<'src> for JsLexer<'src> {
     }
 }
 
+impl<'src> ReLexer<'src> for JsLexer<'src> {
+    fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind {
+        let old_position = self.position;
+        self.position = u32::from(self.current_start) as usize;
+
+        let re_lexed_kind = match context {
+            JsReLexContext::Regex if matches!(self.current(), T![/] | T![/=]) => self.read_regex(),
+            JsReLexContext::BinaryOperator => self.re_lex_binary_operator(),
+            JsReLexContext::TypeArgumentLessThan => self.re_lex_type_argument_less_than(),
+            JsReLexContext::JsxIdentifier => self.re_lex_jsx_identifier(old_position),
+            JsReLexContext::JsxChild if !self.is_eof() => self.lex_jsx_child_token(),
+            _ => self.current(),
+        };
+
+        if self.current() == re_lexed_kind {
+            // Didn't re-lex anything. Return existing token again
+            self.position = old_position;
+        } else {
+            self.current_kind = re_lexed_kind;
+        }
+
+        re_lexed_kind
+    }
+}
+
+impl<'src> LexerWithCheckpoint<'src> for JsLexer<'src> {
+    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
+        LexerCheckpoint {
+            position: TextSize::from(self.position as u32),
+            current_start: self.current_start,
+            current_flags: self.current_flags,
+            current_kind: self.current_kind,
+            after_line_break: self.after_newline,
+            unicode_bom_length: self.unicode_bom_length,
+            diagnostics_pos: self.diagnostics.len() as u32,
+        }
+    }
+}
+
 impl<'src> JsLexer<'src> {
     /// Make a new lexer from a str, this is safe because strs are valid utf8
     pub fn from_str(source: &'src str) -> Self {
@@ -1771,7 +1780,13 @@ impl<'src> JsLexer<'src> {
         let dispatched = lookup_byte(byte);
 
         match dispatched {
-            WHS => self.consume_newline_or_whitespaces(),
+            WHS => {
+                let kind = self.consume_newline_or_whitespaces();
+                if kind == Self::NEWLINE {
+                    self.after_newline = true;
+                }
+                kind
+            }
             EXL => self.resolve_bang(),
             HAS => self.read_shebang(),
             PRC => self.bin_or_assign(T![%], T![%=]),
@@ -1876,7 +1891,11 @@ impl<'src> JsLexer<'src> {
                 if is_linebreak(chr)
                     || (UNICODE_WHITESPACE_STARTS.contains(&byte) && UNICODE_SPACES.contains(&chr))
                 {
-                    self.consume_newline_or_whitespaces()
+                    let kind = self.consume_newline_or_whitespaces();
+                    if kind == Self::NEWLINE {
+                        self.after_newline = true;
+                    }
+                    kind
                 } else {
                     self.advance(chr.len_utf8() - 1);
                     if is_js_id_start(chr) {
diff --git a/crates/biome_parser/src/lexer.rs b/crates/biome_parser/src/lexer.rs
index 72539b070cb1..195696fb67bb 100644
--- a/crates/biome_parser/src/lexer.rs
+++ b/crates/biome_parser/src/lexer.rs
@@ -9,6 +9,12 @@ use unicode_bom::Bom;
 /// `Lexer` trait defines the necessary methods a lexer must implement.
 /// Lexer is responsible for dividing the source code into meaningful parsing units (kinds).
 pub trait Lexer<'src> {
+    /// The kind of the newline
+    const NEWLINE: Self::Kind;
+
+    /// The kind of the space
+    const WHITESPACE: Self::Kind;
+
     /// A kind of syntax, as identified by the lexer.
     type Kind: SyntaxKind;
     /// The specific context in which the lexer operates.
@@ -32,17 +38,9 @@ pub trait Lexer<'src> {
 
     fn current_start(&self) -> TextSize;
 
-    /// Creating a checkpoint of the lexer's current state.
-    /// `rewind` can be used later to restore the lexer to the checkpoint's state.
-    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind>;
-
     /// Tokenizes the next kind into a single coherent token within the given lexing context.
     fn next_token(&mut self, context: Self::LexContext) -> Self::Kind;
 
-    /// Re-lexes the current kind under a different lexing context.
-    /// Useful when a token can have different interpretations based on context.
-    fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind;
-
     /// Returns `true` if the current kind is preceded by a line break.
     fn has_preceding_line_break(&self) -> bool;
 
@@ -69,7 +67,14 @@ pub trait Lexer<'src> {
     ///
     /// ## Safety
     /// Must be called at a valid UT8 char boundary
-    fn consume_newline_or_whitespaces(&mut self) -> Self::Kind;
+    fn consume_newline_or_whitespaces(&mut self) -> Self::Kind {
+        if self.consume_newline() {
+            Self::NEWLINE
+        } else {
+            self.consume_whitespaces();
+            Self::WHITESPACE
+        }
+    }
 
     /// Consumes all whitespace until a non-whitespace or a newline is found.
     ///
@@ -261,6 +266,18 @@ pub trait Lexer<'src> {
     }
 }
 
+pub trait ReLexer<'src>: Lexer<'src> + LexerWithCheckpoint<'src> {
+    /// Re-lexes the current kind under a different lexing context.
+    /// Useful when a token can have different interpretations based on context.
+    fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind;
+}
+
+pub trait LexerWithCheckpoint<'src>: Lexer<'src> {
+    /// Creating a checkpoint of the lexer's current state.
+    /// `rewind` can be used later to restore the lexer to the checkpoint's state.
+    fn checkpoint(&self) -> LexerCheckpoint<Self::Kind>;
+}
+
 /// `LexContext` is a trait that represents the context in
 /// which a parser is currently operating. It is used to specify
 /// and handle the variations in parsing requirements depending
@@ -407,16 +424,6 @@ impl<'l, Lex: Lexer<'l>> BufferedLexer<'l, Lex> {
         self.inner.source()
     }
 
-    /// Creates a checkpoint representing the current lexer state. Allows rewinding
-    /// the lexer to this position later on.
-    pub fn checkpoint(&self) -> LexerCheckpoint<Lex::Kind> {
-        if let Some(current) = &self.current {
-            current.clone()
-        } else {
-            self.inner.checkpoint()
-        }
-    }
-
     /// Rewinds the lexer to the state stored in the checkpoint.
     pub fn rewind(&mut self, checkpoint: LexerCheckpoint<Lex::Kind>) {
         self.inner.rewind(checkpoint);
@@ -431,8 +438,23 @@ impl<'l, Lex: Lexer<'l>> BufferedLexer<'l, Lex> {
         }
     }
 
+    /// Returns an iterator over the tokens following the current token to perform lookahead.
+    /// For example, what's the 3rd token after the current token?
+    #[inline(always)]
+    pub fn lookahead<'s>(&'s mut self) -> LookaheadIterator<'s, 'l, Lex> {
+        LookaheadIterator::new(self)
+    }
+
+    /// Consumes the buffered lexer and returns the lexing diagnostics
+    pub fn finish(self) -> Vec<ParseDiagnostic> {
+        self.inner.finish()
+    }
+}
+impl<'l, Lex> BufferedLexer<'l, Lex>
+where
+    Lex: ReLexer<'l>,
+{
     /// Re-lex the current token in the given context
-    /// See [Lexer::re_lex]
     pub fn re_lex(&mut self, context: Lex::ReLexContext) -> Lex::Kind {
         let current_kind = self.current();
         let current_checkpoint = self.inner.checkpoint();
@@ -455,17 +477,18 @@ impl<'l, Lex: Lexer<'l>> BufferedLexer<'l, Lex> {
 
         new_kind
     }
+}
 
-    /// Returns an iterator over the tokens following the current token to perform lookahead.
-    /// For example, what's the 3rd token after the current token?
-    #[inline(always)]
-    pub fn lookahead<'s>(&'s mut self) -> LookaheadIterator<'s, 'l, Lex> {
-        LookaheadIterator::new(self)
-    }
-
-    /// Consumes the buffered lexer and returns the lexing diagnostics
-    pub fn finish(self) -> Vec<ParseDiagnostic> {
-        self.inner.finish()
+impl<'l, Lex> BufferedLexer<'l, Lex>
+where
+    Lex: LexerWithCheckpoint<'l>,
+{
+    pub fn checkpoint(&self) -> LexerCheckpoint<Lex::Kind> {
+        if let Some(current) = &self.current {
+            current.clone()
+        } else {
+            self.inner.checkpoint()
+        }
     }
 }
 
@@ -484,7 +507,7 @@ impl<'l, 't, Lex: Lexer<'t>> LookaheadIterator<'l, 't, Lex> {
     }
 }
 
-impl<'l, 't, Lex: Lexer<'t>> Iterator for LookaheadIterator<'l, 't, Lex> {
+impl<'l, 't, Lex: LexerWithCheckpoint<'t>> Iterator for LookaheadIterator<'l, 't, Lex> {
     type Item = LookaheadToken<Lex::Kind>;
 
     #[inline]
@@ -524,7 +547,7 @@ impl<'l, 't, Lex: Lexer<'t>> Iterator for LookaheadIterator<'l, 't, Lex> {
     }
 }
 
-impl<'l, 't, Lex: Lexer<'t>> FusedIterator for LookaheadIterator<'l, 't, Lex> {}
+impl<'l, 't, Lex: LexerWithCheckpoint<'t>> FusedIterator for LookaheadIterator<'l, 't, Lex> {}
 
 #[derive(Debug)]
 pub struct LookaheadToken<Kind: SyntaxKind> {
@@ -553,7 +576,7 @@ impl<Kind: SyntaxKind> From<&LexerCheckpoint<Kind>> for LookaheadToken<Kind> {
 
 /// Stores the state of the lexer so that it may later be restored to that position.
 #[derive(Debug, Clone)]
-pub struct LexerCheckpoint<Kind: SyntaxKind> {
+pub struct LexerCheckpoint<Kind> {
     pub position: TextSize,
     pub current_start: TextSize,
     pub current_kind: Kind,
diff --git a/crates/biome_parser/src/prelude.rs b/crates/biome_parser/src/prelude.rs
index f91f48823b28..02b140765705 100644
--- a/crates/biome_parser/src/prelude.rs
+++ b/crates/biome_parser/src/prelude.rs
@@ -1,6 +1,6 @@
 pub use crate::diagnostic::{ParseDiagnostic, ToDiagnostic};
 pub use crate::marker::{CompletedMarker, Marker};
 pub use crate::parsed_syntax::ParsedSyntax;
-pub use crate::token_source::{BumpWithContext, NthToken, TokenSource};
+pub use crate::token_source::{BumpWithContext, NthToken, TokenSource, Trivia};
 pub use crate::{token_set, TokenSet};
 pub use crate::{Parser, SyntaxFeature};
diff --git a/knope.toml b/knope.toml
index 8bd5af58f2e3..b7216a5a0fe2 100644
--- a/knope.toml
+++ b/knope.toml
@@ -135,3 +135,7 @@ changelog = "crates/biome_graphql_syntax/CHANGELOG.md"
 [packages.biome_graphql_factory]
 versioned_files = ["crates/biome_graphql_factory/Cargo.toml"]
 changelog = "crates/biome_graphql_factory/CHANGELOG.md"
+
+[package."biome_html_parser"]
+versioned_files = ["creates/biome_html_parser/Cargo.toml"]
+changelog = "crates/biome_html_parser/CHANGELOG.md"
diff --git a/xtask/codegen/html.ungram b/xtask/codegen/html.ungram
index 7c2ae13a5566..94d69d01a255 100644
--- a/xtask/codegen/html.ungram
+++ b/xtask/codegen/html.ungram
@@ -40,8 +40,8 @@ HtmlBogus = SyntaxElement*
 
 HtmlRoot =
 	bom: 'UNICODE_BOM'?
-	directive: HtmlDirective
-	tags: HtmlElementList
+	directive: HtmlDirective?
+	html: HtmlElement?
 	eof: 'EOF'
 
 // <!DOCTYPE html>
@@ -50,7 +50,11 @@ HtmlRoot =
 HtmlDirective =
 	'<'
 	'!'
-	content: HtmlString
+	doctype: 'doctype'
+	html: 'html'?
+	quirk: 'html_literal'?
+	public_id: 'html_string_literal'?
+	system_id: 'html_string_literal'?
 	'>'
 
 // ==================================
diff --git a/xtask/codegen/src/html_kinds_src.rs b/xtask/codegen/src/html_kinds_src.rs
index e1bbc332a6e7..2748bf14918d 100644
--- a/xtask/codegen/src/html_kinds_src.rs
+++ b/xtask/codegen/src/html_kinds_src.rs
@@ -8,8 +8,8 @@ pub const HTML_KINDS_SRC: KindsSrc = KindsSrc {
         ("=", "EQ"),
         ("!", "BANG"),
     ],
-    keywords: &["null", "true", "false"],
-    literals: &["HTML_STRING_LITERAL"],
+    keywords: &["null", "true", "false", "doctype", "html"],
+    literals: &["HTML_STRING_LITERAL", "HTML_LITERAL"],
     tokens: &[
         "ERROR_TOKEN",
         "NEWLINE",