diff --git a/crates/biome_html_formatter/src/comments.rs b/crates/biome_html_formatter/src/comments.rs
index b8b85008d06b..fec2bc969ebd 100644
--- a/crates/biome_html_formatter/src/comments.rs
+++ b/crates/biome_html_formatter/src/comments.rs
@@ -63,6 +63,21 @@ impl FormatRule> for FormatHtmlLeadingComment {
}
}
+#[derive(Default)]
+pub struct FormatHtmlTrailingComment;
+
+impl FormatRule> for FormatHtmlTrailingComment {
+ type Context = HtmlFormatContext;
+
+ fn fmt(
+ &self,
+ comment: &SourceComment,
+ f: &mut Formatter,
+ ) -> FormatResult<()> {
+ write!(f, [comment.piece().as_piece()])
+ }
+}
+
#[derive(Eq, PartialEq, Copy, Clone, Debug, Default)]
pub struct HtmlCommentStyle;
@@ -82,8 +97,8 @@ impl CommentStyle for HtmlCommentStyle {
fn place_comment(
&self,
- _comment: DecoratedComment,
+ comment: DecoratedComment,
) -> CommentPlacement {
- todo!();
+ CommentPlacement::Default(comment)
}
}
diff --git a/crates/biome_html_formatter/src/html/lists/element_list.rs b/crates/biome_html_formatter/src/html/lists/element_list.rs
index d687a63417df..c2a55b268bcd 100644
--- a/crates/biome_html_formatter/src/html/lists/element_list.rs
+++ b/crates/biome_html_formatter/src/html/lists/element_list.rs
@@ -1,10 +1,734 @@
-use crate::prelude::*;
-use biome_html_syntax::HtmlElementList;
+//! This implementation is very heavily inspired by the JSX formatter implementation for JsxChildList.
+
+use std::cell::RefCell;
+
+use crate::{
+ comments::HtmlComments,
+ prelude::*,
+ utils::children::{
+ html_split_children, is_meaningful_html_text, HtmlChild, HtmlChildrenIterator,
+ HtmlRawSpace, HtmlSpace,
+ },
+};
+use biome_formatter::{best_fitting, prelude::*, CstFormatContext};
+use biome_formatter::{format_args, write, VecBuffer};
+use biome_html_syntax::{AnyHtmlElement, HtmlElementList};
+use tag::GroupMode;
#[derive(Debug, Clone, Default)]
-pub(crate) struct FormatHtmlElementList;
+pub(crate) struct FormatHtmlElementList {
+ layout: HtmlChildListLayout,
+}
impl FormatRule for FormatHtmlElementList {
type Context = HtmlFormatContext;
fn fmt(&self, node: &HtmlElementList, f: &mut HtmlFormatter) -> FormatResult<()> {
- f.join().entries(node.iter().formatted()).finish()
+ if node.is_empty() {
+ return Ok(());
+ }
+
+ let result = self.fmt_children(node, f)?;
+ match result {
+ FormatChildrenResult::ForceMultiline(format_multiline) => {
+ write!(f, [format_multiline])
+ }
+ FormatChildrenResult::BestFitting {
+ flat_children,
+ expanded_children,
+ } => {
+ write!(f, [best_fitting![flat_children, expanded_children]])
+ }
+ }
+ }
+}
+
+/// The result of formatting the children of an [HtmlElementList]. This is ultimately determined by [FormatHtmlElementList::layout].
+#[derive(Debug)]
+pub(crate) enum FormatChildrenResult {
+ /// Force the children to be formatted over multiple lines.
+ ///
+ /// For example:
+ /// ```html
+ ///
+ ///
1
+ ///
2
+ ///
+ /// ```
+ ///
+ /// This usually occurs when the children are already formatted over multiple lines, or when the children contains another tag.
+ ForceMultiline(FormatMultilineChildren),
+
+ /// Let the formatter determine whether the children should be formatted over multiple lines or if they can be kept on a single line.
+ BestFitting {
+ flat_children: FormatFlatChildren,
+ expanded_children: FormatMultilineChildren,
+ },
+}
+
+impl FormatHtmlElementList {
+ pub(crate) fn fmt_children(
+ &self,
+ list: &HtmlElementList,
+ f: &mut HtmlFormatter,
+ ) -> FormatResult {
+ self.disarm_debug_assertions(list, f);
+
+ let children_meta = self.children_meta(list, f.context().comments());
+ let layout = self.layout(children_meta);
+
+ let multiline_layout = if children_meta.meaningful_text {
+ MultilineLayout::Fill
+ } else {
+ MultilineLayout::NoFill
+ };
+
+ let mut flat = FlatBuilder::new();
+ let mut multiline = MultilineBuilder::new(multiline_layout);
+
+ let mut force_multiline = layout.is_multiline();
+
+ let mut children = html_split_children(list.iter(), f.context().comments())?;
+
+ // Trim trailing new lines
+ if let Some(HtmlChild::EmptyLine | HtmlChild::Newline) = children.last() {
+ children.pop();
+ }
+
+ let mut last: Option<&HtmlChild> = None;
+ let mut children_iter = HtmlChildrenIterator::new(children.iter());
+
+ // Trim leading new lines
+ if let Some(HtmlChild::Newline | HtmlChild::EmptyLine) = children_iter.peek() {
+ children_iter.next();
+ }
+
+ while let Some(child) = children_iter.next() {
+ let mut child_breaks = false;
+
+ match &child {
+ // A single word: Both `a` and `b` are a word in `a b` because they're separated by HTML Whitespace.
+ HtmlChild::Word(word) => {
+ let separator = match children_iter.peek() {
+ Some(HtmlChild::Word(_)) => {
+ // Separate words by a space or line break in extended mode
+ Some(WordSeparator::BetweenWords)
+ }
+
+ // Last word or last word before an element without any whitespace in between
+ Some(HtmlChild::NonText(next_child)) => Some(WordSeparator::EndOfText {
+ is_soft_line_break: !matches!(
+ next_child,
+ AnyHtmlElement::HtmlSelfClosingElement(_)
+ ) || word.is_single_character(),
+ }),
+
+ Some(HtmlChild::Newline | HtmlChild::Whitespace | HtmlChild::EmptyLine) => {
+ None
+ }
+
+ None => None,
+ };
+
+ child_breaks = separator.map_or(false, |separator| separator.will_break());
+
+ flat.write(&format_args![word, separator], f);
+
+ if let Some(separator) = separator {
+ multiline.write_with_separator(word, &separator, f);
+ } else {
+ // it's safe to write without a separator because None means that next element is a separator or end of the iterator
+ multiline.write_content(word, f);
+ }
+ }
+
+ // * Whitespace after the opening tag and before a meaningful text: `
`
+ let is_trailing_or_only_whitespace = children_iter.peek().is_none();
+
+ if is_trailing_or_only_whitespace || is_after_line_break {
+ multiline.write_separator(&HtmlRawSpace, f);
+ }
+ // Leading whitespace. Only possible if used together with a expression child
+ //
+ // ```
+ //
+ //
+ // {' '}
+ //
+ //
+ // ```
+ else if last.is_none() {
+ multiline.write_with_separator(&HtmlRawSpace, &hard_line_break(), f);
+ } else {
+ multiline.write_separator(&HtmlSpace, f);
+ }
+ }
+
+ // A new line between some JSX text and an element
+ HtmlChild::Newline => {
+ let is_soft_break = {
+ // Here we handle the case when we have a newline between a single-character word and a jsx element
+ // We need to use the previous and the next element
+ // [HtmlChild::Word, HtmlChild::Newline, HtmlChild::NonText]
+ // ```
+ //
+ //
First
,
+ //
Second
+ //
+ // ```
+ if let Some(HtmlChild::Word(word)) = last {
+ let is_next_element_self_closing = matches!(
+ children_iter.peek(),
+ Some(HtmlChild::NonText(AnyHtmlElement::HtmlSelfClosingElement(
+ _
+ )))
+ );
+ !is_next_element_self_closing && word.is_single_character()
+ }
+ // Here we handle the case when we have a single-character word between a new line and a jsx element
+ // Here we need to look ahead two elements
+ // [HtmlChild::Newline, HtmlChild::Word, HtmlChild::NonText]
+ // ```
+ //
+ //
First
+ // ,
Second
+ //
+ // ```
+ else if let Some(HtmlChild::Word(next_word)) = children_iter.peek() {
+ let next_next_element = children_iter.peek_next();
+ let is_next_next_element_new_line =
+ matches!(next_next_element, Some(HtmlChild::Newline));
+ let is_next_next_element_self_closing = matches!(
+ next_next_element,
+ Some(HtmlChild::NonText(AnyHtmlElement::HtmlSelfClosingElement(
+ _
+ )))
+ );
+ let has_new_line_and_self_closing = is_next_next_element_new_line
+ && matches!(
+ children_iter.peek_next_next(),
+ Some(HtmlChild::NonText(
+ AnyHtmlElement::HtmlSelfClosingElement(_)
+ ))
+ );
+
+ !has_new_line_and_self_closing
+ && !is_next_next_element_self_closing
+ && next_word.is_single_character()
+ } else {
+ false
+ }
+ };
+
+ if is_soft_break {
+ multiline.write_separator(&soft_line_break(), f);
+ } else {
+ child_breaks = true;
+ multiline.write_separator(&hard_line_break(), f);
+ }
+ }
+
+ // An empty line between some JSX text and an element
+ HtmlChild::EmptyLine => {
+ child_breaks = true;
+
+ // Additional empty lines are not preserved when any of
+ // the children are a meaningful text node.
+ //
+ // <>
+ //
+ // Third
+ // >
+ if children_meta.meaningful_text {
+ multiline.write_separator(&hard_line_break(), f);
+ } else {
+ multiline.write_separator(&empty_line(), f);
+ }
+ }
+
+ // Any child that isn't text
+ HtmlChild::NonText(non_text) => {
+ let line_mode = match children_iter.peek() {
+ Some(HtmlChild::Word(word)) => {
+ // Break if the current or next element is a self closing element
+ // ```javascript
+ // adefg
+ // ```
+ // Becomes
+ // ```javascript
+ //
+ // adefg
+ // ```
+ if matches!(non_text, AnyHtmlElement::HtmlSelfClosingElement(_))
+ && !word.is_single_character()
+ {
+ Some(LineMode::Hard)
+ } else {
+ Some(LineMode::Soft)
+ }
+ }
+
+ // Add a hard line break if what comes after the element is not a text or is all whitespace
+ Some(HtmlChild::NonText(_)) => Some(LineMode::Hard),
+
+ Some(HtmlChild::Newline | HtmlChild::Whitespace | HtmlChild::EmptyLine) => {
+ None
+ }
+ // Don't insert trailing line breaks
+ None => None,
+ };
+
+ child_breaks = line_mode.map_or(false, |mode| mode.is_hard());
+
+ let format_separator = line_mode.map(|mode| {
+ format_with(move |f| f.write_element(FormatElement::Line(mode)))
+ });
+
+ if force_multiline {
+ if let Some(format_separator) = format_separator {
+ multiline.write_with_separator(
+ &non_text.format(),
+ &format_separator,
+ f,
+ );
+ } else {
+ // it's safe to write without a separator because None means that next element is a separator or end of the iterator
+ multiline.write_content(&non_text.format(), f);
+ }
+ } else {
+ let mut memoized = non_text.format().memoized();
+
+ force_multiline = memoized.inspect(f)?.will_break();
+ flat.write(&format_args![memoized, format_separator], f);
+
+ if let Some(format_separator) = format_separator {
+ multiline.write_with_separator(&memoized, &format_separator, f);
+ } else {
+ // it's safe to write without a separator because None means that next element is a separator or end of the iterator
+ multiline.write_content(&memoized, f);
+ }
+ }
+ }
+ }
+
+ if child_breaks {
+ flat.disable();
+ force_multiline = true;
+ }
+
+ last = Some(child);
+ }
+
+ if force_multiline {
+ Ok(FormatChildrenResult::ForceMultiline(multiline.finish()?))
+ } else {
+ Ok(FormatChildrenResult::BestFitting {
+ flat_children: flat.finish()?,
+ expanded_children: multiline.finish()?,
+ })
+ }
+ }
+
+ /// Tracks the tokens of [HtmlContent] nodes to be formatted and
+ /// asserts that the suppression comments are checked (they get ignored).
+ ///
+ /// This is necessary because the formatting of [HtmlContentList] bypasses the node formatting for
+ /// [HtmlContent] and instead, formats the nodes itself.
+ #[cfg(debug_assertions)]
+ fn disarm_debug_assertions(&self, node: &HtmlElementList, f: &mut HtmlFormatter) {
+ use biome_formatter::CstFormatContext;
+ use AnyHtmlElement::*;
+
+ for child in node {
+ match child {
+ HtmlContent(text) => {
+ f.state_mut().track_token(&text.value_token().unwrap());
+
+ // You can't suppress a text node
+ f.context()
+ .comments()
+ .mark_suppression_checked(text.syntax());
+ }
+ _ => {
+ continue;
+ }
+ }
+ }
+ }
+
+ #[cfg(not(debug_assertions))]
+ fn disarm_debug_assertions(&self, _: &HtmlElementList, _: &mut HtmlFormatter) {}
+
+ fn layout(&self, meta: ChildrenMeta) -> HtmlChildListLayout {
+ match self.layout {
+ HtmlChildListLayout::BestFitting => {
+ if meta.any_tag || meta.multiple_expressions {
+ HtmlChildListLayout::Multiline
+ } else {
+ HtmlChildListLayout::BestFitting
+ }
+ }
+ HtmlChildListLayout::Multiline => HtmlChildListLayout::Multiline,
+ }
+ }
+
+ /// Computes additional meta data about the children by iterating once over all children.
+ fn children_meta(&self, list: &HtmlElementList, _comments: &HtmlComments) -> ChildrenMeta {
+ let mut meta = ChildrenMeta::default();
+
+ for child in list {
+ use AnyHtmlElement::*;
+
+ match child {
+ HtmlElement(_) | HtmlSelfClosingElement(_) => meta.any_tag = true,
+ HtmlContent(text) => {
+ meta.meaningful_text = meta.meaningful_text
+ || text
+ .value_token()
+ .map_or(false, |token| is_meaningful_html_text(token.text()));
+ }
+ _ => {}
+ }
+ }
+
+ meta
+ }
+}
+
+#[derive(Debug, Default, Copy, Clone)]
+pub enum HtmlChildListLayout {
+ /// Prefers to format the children on a single line if possible.
+ #[default]
+ BestFitting,
+
+ /// Forces the children to be formatted over multiple lines
+ Multiline,
+}
+
+impl HtmlChildListLayout {
+ const fn is_multiline(&self) -> bool {
+ matches!(self, HtmlChildListLayout::Multiline)
+ }
+}
+
+#[derive(Copy, Clone, Debug, Default)]
+struct ChildrenMeta {
+ /// `true` if children contains a [HtmlElement] or [HtmlFragment]
+ any_tag: bool,
+
+ /// `true` if children contains more than one [HtmlExpressionChild]
+ multiple_expressions: bool,
+
+ /// `true` if any child contains meaningful a [HtmlText] with meaningful text.
+ meaningful_text: bool,
+}
+
+#[derive(Copy, Clone, Debug)]
+enum WordSeparator {
+ /// Separator between two words. Creates a soft line break or space.
+ ///
+ /// `a b`
+ BetweenWords,
+
+ /// A separator of a word at the end of a [HtmlText] element. Either because it is the last
+ /// child in its parent OR it is right before the start of another child (element, expression, ...).
+ ///
+ /// ```javascript
+ ///
a
; // last element of parent
+ ///
a
// last element before another element
+ ///
a{expression}
// last element before expression
+ /// ```
+ ///
+ /// Creates a soft line break EXCEPT if the next element is a self closing element
+ /// or the previous word was an ascii punctuation, which results in a hard line break:
+ ///
+ /// ```javascript
+ /// a =
ab
;
+ ///
+ /// // becomes
+ ///
+ /// a = (
+ ///
+ /// ab
+ ///
+ ///
+ /// );
+ /// ```
+ EndOfText { is_soft_line_break: bool },
+}
+
+impl WordSeparator {
+ /// Returns if formatting this separator will result in a child that expands
+ fn will_break(&self) -> bool {
+ matches!(
+ self,
+ WordSeparator::EndOfText {
+ is_soft_line_break: false,
+ }
+ )
+ }
+}
+
+impl Format for WordSeparator {
+ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> {
+ match self {
+ WordSeparator::BetweenWords => soft_line_break_or_space().fmt(f),
+ WordSeparator::EndOfText { is_soft_line_break } => {
+ if *is_soft_line_break {
+ soft_line_break().fmt(f)
+ }
+ // ```javascript
+ //
+ // ```
+ else {
+ hard_line_break().fmt(f)
+ }
+ }
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug, Default)]
+enum MultilineLayout {
+ Fill,
+ #[default]
+ NoFill,
+}
+
+/// Builder that helps to create the output for the multiline layout.
+///
+/// The multiline layout may use [FormatElement::Fill] element that requires that its children
+/// are an alternating sequence of `[element, separator, element, separator, ...]`.
+///
+/// This requires that each element is wrapped inside of a list if it emits more than one element to uphold
+/// the constraints of [FormatElement::Fill].
+///
+/// However, the wrapping is only necessary for [MultilineLayout::Fill] for when the [FormatElement::Fill] element is used.
+///
+/// This builder takes care of doing the least amount of work necessary for the chosen layout while also guaranteeing
+/// that the written element is valid
+#[derive(Debug, Clone)]
+struct MultilineBuilder {
+ layout: MultilineLayout,
+ result: FormatResult>,
+}
+
+impl MultilineBuilder {
+ fn new(layout: MultilineLayout) -> Self {
+ Self {
+ layout,
+ result: Ok(Vec::new()),
+ }
+ }
+
+ /// Formats an element that does not require a separator
+ /// It is safe to omit the separator because at the call side we must guarantee that we have reached the end of the iterator
+ /// or the next element is a space/newline that should be written into the separator "slot".
+ fn write_content(&mut self, content: &dyn Format, f: &mut HtmlFormatter) {
+ self.write(content, None, f);
+ }
+
+ /// Formatting a separator does not require any element in the separator slot
+ fn write_separator(
+ &mut self,
+ separator: &dyn Format,
+ f: &mut HtmlFormatter,
+ ) {
+ self.write(separator, None, f);
+ }
+
+ fn write_with_separator(
+ &mut self,
+ content: &dyn Format,
+ separator: &dyn Format,
+ f: &mut HtmlFormatter,
+ ) {
+ self.write(content, Some(separator), f);
+ }
+
+ fn write(
+ &mut self,
+ content: &dyn Format,
+ separator: Option<&dyn Format>,
+ f: &mut HtmlFormatter,
+ ) {
+ let result = std::mem::replace(&mut self.result, Ok(Vec::new()));
+
+ self.result = result.and_then(|elements| {
+ let elements = {
+ let mut buffer = VecBuffer::new_with_vec(f.state_mut(), elements);
+ match self.layout {
+ MultilineLayout::Fill => {
+ // Make sure that the separator and content only ever write a single element
+ buffer.write_element(FormatElement::Tag(Tag::StartEntry))?;
+ write!(buffer, [content])?;
+ buffer.write_element(FormatElement::Tag(Tag::EndEntry))?;
+
+ if let Some(separator) = separator {
+ buffer.write_element(FormatElement::Tag(Tag::StartEntry))?;
+ write!(buffer, [separator])?;
+ buffer.write_element(FormatElement::Tag(Tag::EndEntry))?;
+ }
+ }
+ MultilineLayout::NoFill => {
+ write!(buffer, [content, separator])?;
+
+ if let Some(separator) = separator {
+ write!(buffer, [separator])?;
+ }
+ }
+ };
+ buffer.into_vec()
+ };
+ Ok(elements)
+ })
+ }
+
+ fn finish(self) -> FormatResult {
+ Ok(FormatMultilineChildren {
+ layout: self.layout,
+ elements: RefCell::new(self.result?),
+ })
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct FormatMultilineChildren {
+ layout: MultilineLayout,
+ elements: RefCell>,
+}
+
+impl Format for FormatMultilineChildren {
+ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> {
+ let format_inner = format_once(|f| {
+ if let Some(elements) = f.intern_vec(self.elements.take()) {
+ match self.layout {
+ MultilineLayout::Fill => f.write_elements([
+ FormatElement::Tag(Tag::StartFill),
+ elements,
+ FormatElement::Tag(Tag::EndFill),
+ ])?,
+ MultilineLayout::NoFill => f.write_elements([
+ FormatElement::Tag(Tag::StartGroup(
+ tag::Group::new().with_mode(GroupMode::Expand),
+ )),
+ elements,
+ FormatElement::Tag(Tag::EndGroup),
+ ])?,
+ };
+ }
+
+ Ok(())
+ });
+
+ // This indent is wrapped with a group to ensure that the print mode is
+ // set to `Expanded` when the group prints and will guarantee that the
+ // content _does not_ fit when printed as part of a `Fill`. Example:
+ //
+ //
+ //
+ // {" "}
+ // ({variable})
+ //
+ // The `...` is the element that gets wrapped in the group
+ // by this line. Importantly, it contains a hard line break, and because
+ // [FitsMeasurer::fits_element] considers all hard lines as `Fits::Yes`,
+ // it will cause the element and the following separator to be printed
+ // in flat mode due to the logic of `Fill`. But because the we know the
+ // item breaks over multiple lines, we want it to _not_ fit and print
+ // both the content and the separator in Expanded mode, keeping the
+ // formatting as shown above.
+ //
+ // The `group` here allows us to opt-in to telling the `FitsMeasurer`
+ // that content that breaks shouldn't be considered flat and should be
+ // expanded. This is in contrast to something like a concise array fill,
+ // which _does_ allow breaks to fit and preserves density.
+ write!(f, [group(&block_indent(&format_inner))])
+ }
+}
+
+#[derive(Debug)]
+struct FlatBuilder {
+ result: FormatResult>,
+ disabled: bool,
+}
+
+impl FlatBuilder {
+ fn new() -> Self {
+ Self {
+ result: Ok(Vec::new()),
+ disabled: false,
+ }
+ }
+
+ fn write(&mut self, content: &dyn Format, f: &mut HtmlFormatter) {
+ if self.disabled {
+ return;
+ }
+
+ let result = std::mem::replace(&mut self.result, Ok(Vec::new()));
+
+ self.result = result.and_then(|elements| {
+ let mut buffer = VecBuffer::new_with_vec(f.state_mut(), elements);
+
+ write!(buffer, [content])?;
+
+ Ok(buffer.into_vec())
+ })
+ }
+
+ fn disable(&mut self) {
+ self.disabled = true;
+ }
+
+ fn finish(self) -> FormatResult {
+ assert!(!self.disabled, "The flat builder has been disabled and thus, does no longer store any elements. Make sure you don't call disable if you later intend to format the flat content.");
+
+ Ok(FormatFlatChildren {
+ elements: RefCell::new(self.result?),
+ })
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct FormatFlatChildren {
+ elements: RefCell>,
+}
+
+impl Format for FormatFlatChildren {
+ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> {
+ if let Some(elements) = f.intern_vec(self.elements.take()) {
+ f.write_element(elements)?;
+ }
+ Ok(())
}
}
diff --git a/crates/biome_html_formatter/src/lib.rs b/crates/biome_html_formatter/src/lib.rs
index 3b68dca55214..4d3ffa266564 100644
--- a/crates/biome_html_formatter/src/lib.rs
+++ b/crates/biome_html_formatter/src/lib.rs
@@ -14,6 +14,7 @@ mod cst;
mod generated;
mod html;
pub(crate) mod prelude;
+pub mod utils;
/// Formats a Html file based on its features.
///
diff --git a/crates/biome_html_formatter/src/utils/children.rs b/crates/biome_html_formatter/src/utils/children.rs
new file mode 100644
index 000000000000..824d6476a965
--- /dev/null
+++ b/crates/biome_html_formatter/src/utils/children.rs
@@ -0,0 +1,425 @@
+use std::{
+ iter::{FusedIterator, Peekable},
+ str::Chars,
+};
+
+use biome_formatter::{
+ format_args, prelude::*, write, Buffer, Format, FormatElement, FormatResult,
+};
+use biome_html_syntax::AnyHtmlElement;
+use biome_rowan::{SyntaxResult, TextLen, TextRange, TextSize, TokenText};
+
+use crate::{comments::HtmlComments, context::HtmlFormatContext, HtmlFormatter};
+
+pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
+
+/// Meaningful HTML text is defined to be text that has either non-whitespace
+/// characters, or does not contain a newline. Whitespace is defined as ASCII
+/// whitespace.
+///
+/// ```
+/// use biome_html_formatter::utils::children::is_meaningful_html_text;
+///
+/// assert_eq!(is_meaningful_html_text(" \t\r "), true);
+/// assert_eq!(is_meaningful_html_text(" \n\r "), false);
+/// assert_eq!(is_meaningful_html_text(" Alien "), true);
+/// assert_eq!(is_meaningful_html_text("\n Alien "), true);
+/// assert_eq!(is_meaningful_html_text(" Alien \n"), true);
+/// assert_eq!(is_meaningful_html_text(""), true);
+/// ```
+pub fn is_meaningful_html_text(text: &str) -> bool {
+ let mut has_newline = false;
+ for c in text.chars() {
+ // If there is a non-whitespace character
+ if !HTML_WHITESPACE_CHARS.contains(&c) {
+ return true;
+ } else if c == '\n' {
+ has_newline = true;
+ }
+ }
+
+ !has_newline
+}
+
+/// A word in a Html Text. A word is string sequence that isn't separated by any HTML whitespace.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub(crate) struct HtmlWord {
+ text: TokenText,
+ source_position: TextSize,
+}
+
+impl HtmlWord {
+ fn new(text: TokenText, source_position: TextSize) -> Self {
+ HtmlWord {
+ text,
+ source_position,
+ }
+ }
+
+ pub(crate) fn is_single_character(&self) -> bool {
+ self.text.chars().count() == 1
+ }
+}
+
+impl Format for HtmlWord {
+ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> {
+ f.write_element(FormatElement::LocatedTokenText {
+ source_position: self.source_position,
+ slice: self.text.clone(),
+ })
+ }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub(crate) enum HtmlChild {
+ /// A Single word in a HTML text. For example, the words for `a b\nc` are `[a, b, c]`
+ Word(HtmlWord),
+
+ /// A ` ` whitespace
+ ///
+ /// ```html
+ ///
+ ///
a
+ ///
a
+ ///
a
+ /// b
+ /// ```
+ ///
+ /// Whitespace between two words is not represented as whitespace
+ /// ```javascript
+ ///
a b
+ /// ```
+ /// The space between `a` and `b` is not considered a whitespace.
+ Whitespace,
+
+ /// A new line at the start or end of a [HtmlText] with meaningful content. (that isn't all whitespace
+ /// and contains a new line).
+ ///
+ /// ```html
+ ///
+ /// a
+ ///
+ /// ```
+ Newline,
+
+ /// A [HtmlText] that only consists of whitespace and has at least two line breaks;
+ ///
+ /// ```html
+ ///
+ ///
+ ///
+ ///
+ /// ```
+ ///
+ /// The text between `
` and `` is an empty line text.
+ EmptyLine,
+
+ /// Any other content that isn't a text. Should be formatted as is.
+ NonText(AnyHtmlElement),
+}
+
+impl HtmlChild {
+ pub(crate) const fn is_any_line(&self) -> bool {
+ matches!(self, HtmlChild::EmptyLine | HtmlChild::Newline)
+ }
+}
+
+/// Creates either a space using an expression child and a string literal,
+/// or a regular space, depending on whether the group breaks or not.
+///
+/// ```html
+///
Winter Light
;
+///
+///
+/// Winter Light
+/// Through A Glass Darkly
+/// The Silence
+/// Seventh Seal
+/// Wild Strawberries
+///
+/// ```
+#[derive(Default)]
+pub(crate) struct HtmlSpace;
+
+impl Format for HtmlSpace {
+ fn fmt(&self, formatter: &mut HtmlFormatter) -> FormatResult<()> {
+ write![
+ formatter,
+ [
+ if_group_breaks(&format_args![HtmlRawSpace, soft_line_break()]),
+ if_group_fits_on_line(&space())
+ ]
+ ]
+ }
+}
+
+pub(crate) struct HtmlRawSpace;
+
+impl Format for HtmlRawSpace {
+ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> {
+ write!(f, [text(" ")])
+ }
+}
+
+pub(crate) fn html_split_children(
+ children: I,
+ _comments: &HtmlComments,
+) -> SyntaxResult>
+where
+ I: IntoIterator,
+{
+ let mut builder = HtmlSplitChildrenBuilder::new();
+
+ for child in children {
+ match child {
+ AnyHtmlElement::HtmlContent(text) => {
+ // Split the text into words
+ // Keep track if there's any leading/trailing empty line, new line or whitespace
+
+ let value_token = text.value_token()?;
+ let mut chunks = HtmlSplitChunksIterator::new(value_token.text()).peekable();
+
+ // Text starting with a whitespace
+ if let Some((_, HtmlTextChunk::Whitespace(_whitespace))) = chunks.peek() {
+ match chunks.next() {
+ Some((_, HtmlTextChunk::Whitespace(whitespace))) => {
+ if whitespace.contains('\n') {
+ if chunks.peek().is_none() {
+ // A text only consisting of whitespace that also contains a new line isn't considered meaningful text.
+ // It can be entirely removed from the content without changing the semantics.
+ let newlines =
+ whitespace.chars().filter(|c| *c == '\n').count();
+
+ // Keep up to one blank line between tags/expressions and text.
+ // ```javascript
+ //
+ //
+ //
+ //
+ // ```
+ if newlines > 1 {
+ builder.entry(HtmlChild::EmptyLine);
+ }
+
+ continue;
+ }
+
+ builder.entry(HtmlChild::Newline)
+ } else {
+ builder.entry(HtmlChild::Whitespace)
+ }
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ while let Some(chunk) = chunks.next() {
+ match chunk {
+ (_, HtmlTextChunk::Whitespace(whitespace)) => {
+ // Only handle trailing whitespace. Words must always be joined by new lines
+ if chunks.peek().is_none() {
+ if whitespace.contains('\n') {
+ builder.entry(HtmlChild::Newline);
+ } else {
+ builder.entry(HtmlChild::Whitespace)
+ }
+ }
+ }
+
+ (relative_start, HtmlTextChunk::Word(word)) => {
+ let text = value_token
+ .token_text()
+ .slice(TextRange::at(relative_start, word.text_len()));
+ let source_position = value_token.text_range().start() + relative_start;
+
+ builder.entry(HtmlChild::Word(HtmlWord::new(text, source_position)));
+ }
+ }
+ }
+ }
+ child => {
+ builder.entry(HtmlChild::NonText(child));
+ }
+ }
+ }
+
+ Ok(builder.finish())
+}
+
+/// The builder is used to:
+/// 1. Remove [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace] if a next element is [HtmlChild::Whitespace]
+/// 2. Don't push a new element [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace] if previous one is [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace]
+///
+/// [Prettier applies]: https://github.com/prettier/prettier/blob/b0d9387b95cdd4e9d50f5999d3be53b0b5d03a97/src/language-js/print/jsx.js#L144-L180
+#[derive(Debug)]
+struct HtmlSplitChildrenBuilder {
+ buffer: Vec,
+}
+
+impl HtmlSplitChildrenBuilder {
+ fn new() -> Self {
+ HtmlSplitChildrenBuilder { buffer: vec![] }
+ }
+
+ fn entry(&mut self, child: HtmlChild) {
+ match self.buffer.last_mut() {
+ Some(last @ (HtmlChild::EmptyLine | HtmlChild::Newline | HtmlChild::Whitespace)) => {
+ if matches!(child, HtmlChild::Whitespace) {
+ *last = child;
+ } else if matches!(child, HtmlChild::NonText(_) | HtmlChild::Word(_)) {
+ self.buffer.push(child);
+ }
+ }
+ _ => self.buffer.push(child),
+ }
+ }
+
+ fn finish(self) -> Vec {
+ self.buffer
+ }
+}
+
+#[derive(Eq, PartialEq, Copy, Clone, Debug)]
+enum HtmlTextChunk<'a> {
+ Whitespace(&'a str),
+ Word(&'a str),
+}
+
+/// Splits a text into whitespace only and non-whitespace chunks.
+///
+/// See `jsx_split_chunks_iterator` test for examples
+struct HtmlSplitChunksIterator<'a> {
+ position: TextSize,
+ text: &'a str,
+ chars: Peekable>,
+}
+
+impl<'a> HtmlSplitChunksIterator<'a> {
+ fn new(text: &'a str) -> Self {
+ Self {
+ position: TextSize::default(),
+ text,
+ chars: text.chars().peekable(),
+ }
+ }
+}
+
+impl<'a> Iterator for HtmlSplitChunksIterator<'a> {
+ type Item = (TextSize, HtmlTextChunk<'a>);
+
+ fn next(&mut self) -> Option {
+ let char = self.chars.next()?;
+
+ let start = self.position;
+ self.position += char.text_len();
+
+ let is_whitespace = matches!(char, ' ' | '\n' | '\t' | '\r');
+
+ while let Some(next) = self.chars.peek() {
+ let next_is_whitespace = matches!(next, ' ' | '\n' | '\t' | '\r');
+
+ if is_whitespace != next_is_whitespace {
+ break;
+ }
+
+ self.position += next.text_len();
+ self.chars.next();
+ }
+
+ let range = TextRange::new(start, self.position);
+ let slice = &self.text[range];
+
+ let chunk = if is_whitespace {
+ HtmlTextChunk::Whitespace(slice)
+ } else {
+ HtmlTextChunk::Word(slice)
+ };
+
+ Some((start, chunk))
+ }
+}
+
+impl FusedIterator for HtmlSplitChunksIterator<'_> {}
+
+/// An iterator adaptor that allows a lookahead of three tokens
+///
+/// # Examples
+/// ```
+/// use biome_html_formatter::utils::children::HtmlChildrenIterator;
+///
+/// let buffer = vec![1, 2, 3, 4];
+///
+/// let mut iter = HtmlChildrenIterator::new(buffer.iter());
+///
+/// assert_eq!(iter.peek(), Some(&&1));
+/// assert_eq!(iter.peek_next(), Some(&&2));
+/// assert_eq!(iter.peek_next_next(), Some(&&3));
+/// assert_eq!(iter.next(), Some(&1));
+/// assert_eq!(iter.next(), Some(&2));
+/// assert_eq!(iter.next(), Some(&3));
+/// ```
+#[derive(Clone, Debug)]
+pub struct HtmlChildrenIterator {
+ iter: I,
+
+ peeked: Option