Skip to content

Commit

Permalink
rewrite parser, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hafihaf123 committed Feb 18, 2025
1 parent 00468dc commit 0c96902
Show file tree
Hide file tree
Showing 4 changed files with 288 additions and 64 deletions.
173 changes: 173 additions & 0 deletions rama-http/src/headers/x_robots_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,176 @@ impl FromIterator<RobotsTag> for XRobotsTag {
Self(iter.into_iter().collect())
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::headers::x_robots_tag_components::MaxImagePreviewSetting;
use chrono::{DateTime, Utc};

macro_rules! test_header {
($name: ident, $input: expr, $expected: expr) => {
#[test]
fn $name() {
let decoded = XRobotsTag::decode(
&mut $input
.into_iter()
.map(|s| HeaderValue::from_bytes(s).unwrap())
.collect::<Vec<_>>()
.iter(),
)
.ok();
assert_eq!(decoded, $expected,);
}
};
}

test_header!(
one_rule,
vec![b"noindex"],
Some(XRobotsTag(vec![RobotsTag::builder().no_index().build()]))
);

test_header!(
one_composite_rule,
vec![b"max-snippet: 2025"],
Some(XRobotsTag(vec![RobotsTag::builder()
.max_snippet(2025)
.build()]))
);

test_header!(
multiple_rules,
vec![b"noindex, nofollow, nosnippet"],
Some(XRobotsTag(vec![RobotsTag::builder()
.no_index()
.no_follow()
.no_snippet()
.build()]))
);

test_header!(
multiple_rules_with_composite,
vec![b"max-video-preview: -1, noindex, nofollow, max-snippet: 2025, max-image-preview: standard"],
Some(XRobotsTag(vec![RobotsTag::builder()
.max_video_preview(None)
.no_index()
.no_follow()
.max_snippet(2025)
.max_image_preview(MaxImagePreviewSetting::Standard)
.build()]))
);

test_header!(
one_bot_one_rule,
vec![b"google_bot: noindex"],
Some(XRobotsTag(vec![RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.no_index()
.build()]))
);

test_header!(
one_bot_one_composite_rule,
vec![b"google_bot: max-video-preview: 0"],
Some(XRobotsTag(vec![RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.max_video_preview(Some(0))
.build()]))
);

test_header!(
one_bot_multiple_rules,
vec![b"google_bot: noindex, nosnippet"],
Some(XRobotsTag(vec![RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.no_index()
.no_snippet()
.build()]))
);

test_header!(
one_bot_multiple_rules_with_composite,
vec![b"google_bot: max-video-preview: -1, noindex, nofollow, max-snippet: 2025, max-image-preview: standard"],
Some(XRobotsTag(vec![RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.max_video_preview(None)
.no_index()
.no_follow()
.max_snippet(2025)
.max_image_preview(MaxImagePreviewSetting::Standard)
.build()]))
);

test_header!(
multiple_bots_one_rule,
vec![b"google_bot: noindex, BadBot: nofollow"],
Some(XRobotsTag(vec![
RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.no_index()
.build(),
RobotsTag::builder()
.bot_name("BadBot".parse().unwrap())
.no_follow()
.build()
]))
);

test_header!(
multiple_bots_one_composite_rule,
vec![b"google_bot: unavailable_after: 2025-02-18T08:25:15Z, BadBot: max-image-preview: large"],
Some(XRobotsTag(vec![
RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.unavailable_after(DateTime::parse_from_rfc3339("2025-02-18T08:25:15Z")
.unwrap()
.with_timezone(&Utc))
.build(),
RobotsTag::builder()
.bot_name("BadBot".parse().unwrap())
.max_image_preview(MaxImagePreviewSetting::Large)
.build()
]))
);

test_header!(
multiple_bots_multiple_rules,
vec![b"google_bot: none, indexifembedded, BadBot: nofollow, noai, spc"],
Some(XRobotsTag(vec![
RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.none()
.index_if_embedded()
.build(),
RobotsTag::builder()
.bot_name("BadBot".parse().unwrap())
.no_follow()
.no_ai()
.spc()
.build()
]))
);

test_header!(
multiple_bots_multiple_rules_with_composite,
vec![
b"google_bot: max-snippet: 8, notranslate, max-image-preview: none,\
BadBot: max-video-preview: 2025, noimageindex, max-snippet: 0"
],
Some(XRobotsTag(vec![
RobotsTag::builder()
.bot_name("google_bot".parse().unwrap())
.max_snippet(8)
.no_translate()
.max_image_preview(MaxImagePreviewSetting::None)
.build(),
RobotsTag::builder()
.bot_name("BadBot".parse().unwrap())
.max_video_preview(Some(2025))
.no_image_index()
.max_snippet(0)
.build()
]))
);
}
20 changes: 1 addition & 19 deletions rama-http/src/headers/x_robots_tag_components/robots_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl RobotsTag {
getter!(index_if_embedded, bool);
getter!(max_snippet, u32);
getter!(max_image_preview, MaxImagePreviewSetting, optional);
getter!(max_video_preview, u32, optional);
getter!(max_video_preview, Option<u32>);
getter!(no_translate, bool);
getter!(no_image_index, bool);
getter!(no_ai, bool);
Expand All @@ -98,24 +98,6 @@ impl RobotsTag {
pub fn unavailable_after(&self) -> Option<&DateTime<Utc>> {
self.unavailable_after.as_deref()
}

pub(super) fn is_valid_field_name(field_name: &str) -> bool {
field_name.trim().eq_ignore_ascii_case("all")
|| field_name.eq_ignore_ascii_case("noindex")
|| field_name.eq_ignore_ascii_case("nofollow")
|| field_name.eq_ignore_ascii_case("none")
|| field_name.eq_ignore_ascii_case("nosnippet")
|| field_name.eq_ignore_ascii_case("indexifembedded")
|| field_name.eq_ignore_ascii_case("max-snippet")
|| field_name.eq_ignore_ascii_case("max-image-preview")
|| field_name.eq_ignore_ascii_case("max-video-preview")
|| field_name.eq_ignore_ascii_case("notranslate")
|| field_name.eq_ignore_ascii_case("noimageindex")
|| field_name.eq_ignore_ascii_case("unavailable_after")
|| field_name.eq_ignore_ascii_case("noai")
|| field_name.eq_ignore_ascii_case("noimageai")
|| field_name.eq_ignore_ascii_case("spc")
}
}

impl Display for RobotsTag {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,16 @@ impl Builder<NoTag> {
no_tag_builder_field!(index_if_embedded, bool);
no_tag_builder_field!(max_snippet, u32);
no_tag_builder_field!(max_image_preview, MaxImagePreviewSetting);
no_tag_builder_field!(max_video_preview, u32);
no_tag_builder_field!(max_video_preview, Option<u32>);
no_tag_builder_field!(no_translate, bool);
no_tag_builder_field!(no_image_index, bool);
no_tag_builder_field!(unavailable_after, DateTime<Utc>);
no_tag_builder_field!(no_ai, bool);
no_tag_builder_field!(no_image_ai, bool);
no_tag_builder_field!(spc, bool);

/// Transforms the `Builder<NoTag>` into a `Builder<RobotsTag>` by calling the
/// [`Builder<RobotsTag>::add_field()`] function (see for more detailed documentation)
pub fn add_field(self, s: &str) -> Result<Builder<RobotsTag>, OpaqueError> {
let mut builder = Builder(RobotsTag::new_with_bot_name(self.0.bot_name));
builder.add_field(s)?;
Expand Down Expand Up @@ -174,14 +176,14 @@ impl Builder<RobotsTag> {
robots_tag_builder_field!(index_if_embedded, bool);
robots_tag_builder_field!(max_snippet, u32);
robots_tag_builder_field!(max_image_preview, MaxImagePreviewSetting, optional);
robots_tag_builder_field!(max_video_preview, u32, optional);
robots_tag_builder_field!(max_video_preview, Option<u32>);
robots_tag_builder_field!(no_translate, bool);
robots_tag_builder_field!(no_image_index, bool);
robots_tag_builder_field!(no_ai, bool);
robots_tag_builder_field!(no_image_ai, bool);
robots_tag_builder_field!(spc, bool);

/// Adds a field based on its `&str` representation
/// Adds a field based on its `&str` representation (also handles whitespace by trimming)
///
/// # Returns and Errors
///
Expand Down Expand Up @@ -219,17 +221,23 @@ impl Builder<RobotsTag> {
} else if key.eq_ignore_ascii_case("max-image-preview") {
self.set_max_image_preview(value.parse()?)
} else if key.eq_ignore_ascii_case("max-video-preview") {
self.set_max_video_preview(value.parse().map_err(OpaqueError::from_std)?)
} else if key.eq_ignore_ascii_case("unavailable_after: <date/time>") {
self.set_max_video_preview(match value {
"-1" => None,
_ => Some(value.parse().map_err(OpaqueError::from_std)?),
})
} else if key.eq_ignore_ascii_case("unavailable_after") {
self.set_unavailable_after(value.parse::<ValidDate>()?.into())
} else {
return Err(OpaqueError::from_std(Error::invalid()));
})
} else {
self.add_simple_field(s)
self.add_simple_field(s.trim())
}
}

/// # Contracts
///
/// - expects `s` to be trimmed in advance
fn add_simple_field(&mut self, s: &str) -> Result<&mut Self, OpaqueError> {
Ok(if s.eq_ignore_ascii_case("all") {
self.set_all()
Expand Down
Loading

0 comments on commit 0c96902

Please sign in to comment.