diff --git a/crates/red_knot_test/src/lib.rs b/crates/red_knot_test/src/lib.rs
index 691a71528e1d7b..ae524e0f042950 100644
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@@ -5,10 +5,9 @@ use ruff_db::files::system_path_to_file;
 use ruff_db::parsed::parsed_module;
 use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
 use ruff_source_file::OneIndexed;
-use std::collections::BTreeMap;
 use std::path::PathBuf;
 
-type Failures = BTreeMap<AbsoluteLineNumberPath, matcher::FailuresByLine>;
+type Failures = Vec<(AbsoluteLineNumberPath, matcher::FailuresByLine)>;
 
 mod assertion;
 mod db;
@@ -35,24 +34,16 @@ pub fn run(path: &PathBuf, title: &str) {
             any_failures = true;
             println!("\n{}\n", test.name().bold().underline());
 
-            for (
-                AbsoluteLineNumberPath {
-                    path,
-                    line_number: absolute_line_number,
-                },
-                by_line,
-            ) in failures
-            {
-                println!("{}", path.as_str().bold());
+            for (contextual_path, by_line) in failures {
+                println!("{}", contextual_path.path.as_str().bold());
                 for (line_number, failures) in by_line.iter() {
                     for failure in failures {
-                        let absolute_line_info = format!(
-                            "{}:{}",
-                            title,
-                            absolute_line_number.saturating_add(line_number.get())
-                        );
-                        let line_info = format!("line {line_number}:").cyan();
-                        println!("{absolute_line_info}    {line_info} {failure}");
+                        let absolute_line_number = contextual_path
+                            .starting_line_number
+                            .saturating_add(line_number.try_into().unwrap());
+                        let line_info = format!("{title}:{absolute_line_number}").cyan();
+
+                        println!("    {line_info} {failure}");
                     }
                 }
                 println!();
@@ -65,10 +56,11 @@ pub fn run(path: &PathBuf, title: &str) {
     assert!(!any_failures, "Some tests failed.");
 }
 
-#[derive(PartialEq, PartialOrd, Eq, Ord)]
 struct AbsoluteLineNumberPath {
     path: SystemPathBuf,
-    line_number: OneIndexed,
+
+    // Line number of the ``` that starts the code block
+    starting_line_number: OneIndexed,
 }
 
 fn run_test(test: &parser::MarkdownTest) -> Result<(), Failures> {
@@ -86,15 +78,14 @@ fn run_test(test: &parser::MarkdownTest) -> Result<(), Failures> {
         db.write_file(&full_path, file.code).unwrap();
         paths.push(AbsoluteLineNumberPath {
             path: full_path,
-            line_number: file.md_line_number,
+            starting_line_number: file.starting_line_number,
         });
     }
 
-    let mut failures = BTreeMap::default();
+    let mut failures = vec![];
 
-    for numbered_path in paths {
-        let path = numbered_path.path.clone();
-        let file = system_path_to_file(&db, path.clone()).unwrap();
+    for contextual_path in paths {
+        let file = system_path_to_file(&db, contextual_path.path.clone()).unwrap();
         let parsed = parsed_module(&db, file);
 
         // TODO allow testing against code with syntax errors
@@ -102,17 +93,18 @@ fn run_test(test: &parser::MarkdownTest) -> Result<(), Failures> {
             parsed.errors().is_empty(),
             "Python syntax errors in {}, {:?}: {:?}",
             test.name(),
-            path,
+            contextual_path.path,
             parsed.errors()
         );
 
         matcher::match_file(&db, file, check_types(&db, file)).unwrap_or_else(|line_failures| {
-            failures.insert(numbered_path, line_failures);
+            failures.push((contextual_path, line_failures));
         });
     }
     if failures.is_empty() {
         Ok(())
     } else {
+        failures.sort_by(|(a, _), (b, _)| a.path.cmp(&b.path));
         Err(failures)
     }
 }
diff --git a/crates/red_knot_test/src/parser.rs b/crates/red_knot_test/src/parser.rs
index 122dcc97f588fa..28dcb2b4405e09 100644
--- a/crates/red_knot_test/src/parser.rs
+++ b/crates/red_knot_test/src/parser.rs
@@ -1,7 +1,8 @@
 use once_cell::sync::Lazy;
 use regex::{Captures, Regex};
 use ruff_index::{newtype_index, IndexVec};
-use ruff_source_file::OneIndexed;
+use ruff_source_file::{LineIndex, OneIndexed};
+use ruff_text_size::TextSize;
 use rustc_hash::{FxHashMap, FxHashSet};
 
 /// Parse the Markdown `source` as a test suite with given `title`.
@@ -132,7 +133,9 @@ pub(crate) struct EmbeddedFile<'s> {
     pub(crate) path: &'s str,
     pub(crate) lang: &'s str,
     pub(crate) code: &'s str,
-    pub(crate) md_line_number: OneIndexed,
+
+    /// The line number of the ``` in the markdown file
+    pub(crate) starting_line_number: OneIndexed,
 }
 
 /// Matches an arbitrary amount of whitespace (including newlines), followed by a sequence of `#`
@@ -188,8 +191,10 @@ struct Parser<'s> {
     /// The unparsed remainder of the Markdown source.
     unparsed: &'s str,
 
-    /// Current line number of the parser
-    current_line_number: OneIndexed,
+    /// Current offset of the parser into the markdown file.
+    md_offset: TextSize,
+
+    line_index: LineIndex,
 
     /// Stack of ancestor sections.
     stack: SectionStack,
@@ -210,7 +215,8 @@ impl<'s> Parser<'s> {
             sections,
             files: IndexVec::default(),
             unparsed: source,
-            current_line_number: OneIndexed::new(1).unwrap(),
+            md_offset: TextSize::new(0),
+            line_index: LineIndex::from_source_text(&source),
             stack: SectionStack::new(root_section_id),
             current_section_files: None,
         }
@@ -231,10 +237,13 @@ impl<'s> Parser<'s> {
         }
     }
 
-    fn increment_line_count(&mut self, captures: &Captures<'s>) {
-        self.current_line_number = self
-            .current_line_number
-            .saturating_add(captures[0].lines().count());
+    fn increment_offset(&mut self, size: usize) -> anyhow::Result<()> {
+        self.md_offset = self
+            .md_offset
+            .checked_add(size.try_into()?)
+            .ok_or_else(|| anyhow::anyhow!("Overflow when incrementing offset by {size}"))?;
+
+        Ok(())
     }
 
     fn parse_impl(&mut self) -> anyhow::Result<()> {
@@ -247,7 +256,7 @@ impl<'s> Parser<'s> {
                 // ignore other Markdown syntax (paragraphs, etc) used as comments in the test
                 if let Some(next_newline) = self.unparsed.find('\n') {
                     (_, self.unparsed) = self.unparsed.split_at(next_newline + 1);
-                    self.current_line_number = self.current_line_number.saturating_add(1);
+                    self.increment_offset(next_newline + 1)?;
                 } else {
                     break;
                 }
@@ -283,7 +292,12 @@ impl<'s> Parser<'s> {
 
         self.current_section_files = None;
 
-        self.increment_line_count(captures);
+        self.increment_offset(
+            captures
+                .get(0)
+                .ok_or_else(|| anyhow::anyhow!("No captures found"))?
+                .len(),
+        )?;
 
         Ok(())
     }
@@ -318,7 +332,8 @@ impl<'s> Parser<'s> {
             // CODE_RE can't match without matches for 'lang' and 'code'.
             lang: captures.name("lang").unwrap().into(),
             code: captures.name("code").unwrap().into(),
-            md_line_number: self.current_line_number,
+
+            starting_line_number: self.line_index.line_index(self.md_offset),
         });
 
         if let Some(current_files) = &mut self.current_section_files {
@@ -340,7 +355,12 @@ impl<'s> Parser<'s> {
             self.current_section_files = Some(FxHashSet::from_iter([path]));
         }
 
-        self.increment_line_count(captures);
+        self.increment_offset(
+            captures
+                .get(0)
+                .ok_or_else(|| anyhow::anyhow!("No captures found"))?
+                .len(),
+        )?;
 
         Ok(())
     }
diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs
index a66d6b2f99e345..c5de8c2ceb3e43 100644
--- a/crates/ruff_source_file/src/line_index.rs
+++ b/crates/ruff_source_file/src/line_index.rs
@@ -1,6 +1,6 @@
 use std::fmt;
 use std::fmt::{Debug, Formatter};
-use std::num::{NonZeroUsize, ParseIntError};
+use std::num::{IntErrorKind, NonZeroUsize, ParseIntError, TryFromIntError};
 use std::ops::Deref;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -418,6 +418,22 @@ impl FromStr for OneIndexed {
     }
 }
 
+impl TryFrom<usize> for OneIndexed {
+    type Error = TryFromIntError;
+
+    fn try_from(value: usize) -> Result<Self, Self::Error> {
+        Ok(OneIndexed(NonZeroUsize::try_from(value)?))
+    }
+}
+
+impl TryInto<usize> for OneIndexed {
+    type Error = IntErrorKind;
+
+    fn try_into(self) -> Result<usize, Self::Error> {
+        Ok(self.0.get())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use ruff_text_size::TextSize;