Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix video backward seeking / stepping back sometimes getting stuck (in the presence of b-frames) #8053

Merged
merged 3 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6477,6 +6477,7 @@ dependencies = [
name = "re_video"
version = "0.20.0-alpha.1+dev"
dependencies = [
"bit-vec",
"cfg_aliases 0.2.1",
"criterion",
"crossbeam",
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_video/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ re_build_info.workspace = true
re_log.workspace = true
re_tracing.workspace = true

bit-vec.workspace = true
crossbeam.workspace = true
econtext.workspace = true
itertools.workspace = true
Expand Down
6 changes: 0 additions & 6 deletions crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,12 +516,6 @@ fn read_ffmpeg_output(
// How do we know how large this buffer needs to be?
// Whenever the highest known DTS is behind the PTS, we need to wait until the DTS catches up.
// Otherwise, we'd assign the wrong PTS to the frame that just came in.
//
// Example how presentation timestamps and decode timestamps
// can play out in the presence of B-frames to illustrate this:
// PTS: 1 4 2 3
// DTS: 1 2 3 4
// Stream: I P B B
let frame_info = loop {
let oldest_pts_in_buffer =
pending_frame_infos.first_key_value().map(|(pts, _)| *pts);
Expand Down
201 changes: 185 additions & 16 deletions crates/store/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod mp4;

use std::{collections::BTreeMap, ops::Range};

use bit_vec::BitVec;
use itertools::Itertools as _;

use super::{Time, Timescale};
Expand Down Expand Up @@ -89,6 +90,11 @@ pub struct SamplesStatistics {
///
/// If true, the video typically has no B-frames as those require frame reordering.
pub dts_always_equal_pts: bool,

/// If `dts_always_equal_pts` is false, then this gives for each sample whether its PTS is the highest seen so far.
/// If `dts_always_equal_pts` is true, then this is left as `None`.
/// This is used for optimizing PTS search.
pub has_sample_highest_pts_so_far: Option<BitVec>,
}

impl SamplesStatistics {
Expand All @@ -104,9 +110,25 @@ impl SamplesStatistics {
.iter()
.all(|s| s.decode_timestamp == s.presentation_timestamp);

let mut biggest_pts_so_far = Time::MIN;
let has_sample_highest_pts_so_far = (!dts_always_equal_pts).then(|| {
samples
.iter()
.map(move |sample| {
if sample.presentation_timestamp > biggest_pts_so_far {
biggest_pts_so_far = sample.presentation_timestamp;
true
} else {
false
}
})
.collect()
});

Self {
minimum_presentation_timestamp,
dts_always_equal_pts,
has_sample_highest_pts_so_far,
}
}
}
Expand Down Expand Up @@ -296,34 +318,83 @@ impl VideoData {
})
}

/// For a given decode (!) timestamp, returns the index of the latest sample whose
/// For a given decode (!) timestamp, returns the index of the first sample whose
/// decode timestamp is lesser than or equal to the given timestamp.
pub fn latest_sample_index_at_decode_timestamp(&self, decode_time: Time) -> Option<usize> {
latest_at_idx(
&self.samples,
|sample| sample.decode_timestamp,
&decode_time,
)
fn latest_sample_index_at_decode_timestamp(
samples: &[Sample],
decode_time: Time,
) -> Option<usize> {
latest_at_idx(samples, |sample| sample.decode_timestamp, &decode_time)
}

/// For a given presentation timestamp, return the index of the latest sample
/// whose presentation timestamp is lesser than or equal to the given timestamp.
pub fn latest_sample_index_at_presentation_timestamp(
&self,
/// See [`Self::latest_sample_index_at_presentation_timestamp`], split out for testing purposes.
fn latest_sample_index_at_presentation_timestamp_internal(
samples: &[Sample],
sample_statistics: &SamplesStatistics,
presentation_timestamp: Time,
) -> Option<usize> {
// Find the latest sample where `decode_timestamp <= presentation_timestamp`.
// Because `decode <= presentation`, we never have to look further backwards in the
// video than this.
let decode_sample_idx =
self.latest_sample_index_at_decode_timestamp(presentation_timestamp)?;
Self::latest_sample_index_at_decode_timestamp(samples, presentation_timestamp)?;

// It's very common that dts==pts in which case we're done!
let Some(has_sample_highest_pts_so_far) =
sample_statistics.has_sample_highest_pts_so_far.as_ref()
else {
debug_assert!(!sample_statistics.dts_always_equal_pts);
return Some(decode_sample_idx);
};
debug_assert!(has_sample_highest_pts_so_far.len() == samples.len());

// Search backwards, starting at `decode_sample_idx`, looking for
// the first sample where `sample.presentation_timestamp <= presentation_timestamp`.
// this is the sample which when decoded will be presented at the timestamp the user requested.
self.samples[..=decode_sample_idx]
.iter()
.rposition(|sample| sample.presentation_timestamp <= presentation_timestamp)
// I.e. the sample with the biggest PTS that is smaller or equal to the requested PTS.
//
// The tricky part is that we can't just take the first sample with a presentation timestamp that matches
// since smaller presentation timestamps may still show up further back!
let mut best_index = usize::MAX;
let mut best_pts = Time::MIN;
for sample_idx in (0..=decode_sample_idx).rev() {
let sample = &samples[sample_idx];

if sample.presentation_timestamp == presentation_timestamp {
// Clean hit. Take this one, no questions asked :)
// (assuming that each PTS is unique!)
return Some(sample_idx);
}

if sample.presentation_timestamp < presentation_timestamp
&& sample.presentation_timestamp > best_pts
{
best_pts = sample.presentation_timestamp;
best_index = sample_idx;
}

if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] {
// We won't see any bigger PTS values anymore, meaning we're as close as we can get to the requested PTS!
return Some(best_index);
}
}

None
}

/// For a given presentation timestamp, return the index of the first sample
/// whose presentation timestamp is lesser than or equal to the given timestamp.
///
/// Remember that samples after (i.e. with higher index) may have a *lower* presentation time
/// if the stream has sample reordering!
pub fn latest_sample_index_at_presentation_timestamp(
&self,
presentation_timestamp: Time,
) -> Option<usize> {
Self::latest_sample_index_at_presentation_timestamp_internal(
&self.samples,
&self.samples_statistics,
presentation_timestamp,
)
}

/// For a given decode (!) timestamp, return the index of the group of pictures (GOP) index containing the given timestamp.
Expand Down Expand Up @@ -553,4 +624,102 @@ mod tests {
assert_eq!(latest_at_idx(&v, |v| *v, &11), Some(9));
assert_eq!(latest_at_idx(&v, |v| *v, &1000), Some(9));
}

#[test]
fn test_latest_sample_index_at_presentation_timestamp() {
// This is a snippet of real world data!
let pts = [
512, 1536, 1024, 768, 1280, 2560, 2048, 1792, 2304, 3584, 3072, 2816, 3328, 4608, 4096,
3840, 4352, 5376, 4864, 5120, 6400, 5888, 5632, 6144, 7424, 6912, 6656, 7168, 8448,
7936, 7680, 8192, 9472, 8960, 8704, 9216, 10496, 9984, 9728, 10240, 11520, 11008,
10752, 11264, 12544, 12032, 11776, 12288, 13568, 13056,
];
let dts = [
0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584,
3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912, 7168,
7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752,
11008, 11264, 11520, 11776, 12032, 12288, 12544,
];

// Checking our basic assumptions about this data:
assert_eq!(pts.len(), dts.len());
assert!(pts.iter().zip(dts.iter()).all(|(pts, dts)| dts <= pts));

// Create fake samples from this.
let samples = pts
.into_iter()
.zip(dts)
.map(|(pts, dts)| Sample {
is_sync: false,
decode_timestamp: Time(dts),
presentation_timestamp: Time(pts),
duration: Time(1),
byte_offset: 0,
byte_length: 0,
})
.collect::<Vec<_>>();

let sample_statistics = SamplesStatistics::new(&samples);
assert_eq!(sample_statistics.minimum_presentation_timestamp, Time(512));
assert!(!sample_statistics.dts_always_equal_pts);

// Test queries on the samples.
let query_pts = |pts| {
VideoData::latest_sample_index_at_presentation_timestamp_internal(
&samples,
&sample_statistics,
pts,
)
};

// Check that query for all exact positions works as expected using brute force search as the reference.
for (idx, sample) in samples.iter().enumerate() {
assert_eq!(Some(idx), query_pts(sample.presentation_timestamp));
}

// Check that for slightly offsetted positions the query is still correct.
// This works because for this dataset we know the minimum presentation timesetampe distance is always 256.
for (idx, sample) in samples.iter().enumerate() {
assert_eq!(
Some(idx),
query_pts(sample.presentation_timestamp + Time(1))
);
assert_eq!(
Some(idx),
query_pts(sample.presentation_timestamp + Time(255))
);
}

// A few hardcoded cases - both for illustrative purposes and to make sure the generic tests above are correct.

// Querying before the first sample.
assert_eq!(None, query_pts(Time(0)));
assert_eq!(None, query_pts(Time(123)));

// Querying for the first sample
assert_eq!(Some(0), query_pts(Time(512)));
assert_eq!(Some(0), query_pts(Time(513)));
assert_eq!(Some(0), query_pts(Time(600)));
assert_eq!(Some(0), query_pts(Time(767)));

// The next sample is a jump in index!
assert_eq!(Some(3), query_pts(Time(768)));
assert_eq!(Some(3), query_pts(Time(769)));
assert_eq!(Some(3), query_pts(Time(800)));
assert_eq!(Some(3), query_pts(Time(1023)));

// And the one after that should jump back again.
assert_eq!(Some(2), query_pts(Time(1024)));
assert_eq!(Some(2), query_pts(Time(1025)));
assert_eq!(Some(2), query_pts(Time(1100)));
assert_eq!(Some(2), query_pts(Time(1279)));

// And another one!
assert_eq!(Some(4), query_pts(Time(1280)));
assert_eq!(Some(4), query_pts(Time(1281)));

// Test way outside of the range.
// (this is not the last element in the list since that one doesn't have the highest PTS)
assert_eq!(Some(48), query_pts(Time(123123123123123123)));
}
}
18 changes: 18 additions & 0 deletions crates/viewer/re_data_ui/src/video.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,24 @@ fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_vide
.on_hover_text("Raw presentation timestamp prior to applying the timescale.\n\
This specifies the time at which the frame should be shown relative to the start of a video stream.");

// Judging the following to be a bit too obscure to be of relevance outside of debugging Rerun itself.
#[cfg(debug_assertions)]
{
if let Some(has_sample_highest_pts_so_far) = video_data
.samples_statistics
.has_sample_highest_pts_so_far
.as_ref()
{
if let Some(sample_idx) = video_data
.latest_sample_index_at_presentation_timestamp(frame_info.presentation_timestamp)
{
ui.list_item_flat_noninteractive(
PropertyContent::new("Highest PTS so far").value_text(has_sample_highest_pts_so_far[sample_idx].to_string())
).on_hover_text("Whether the presentation timestamp (PTS) at the this frame is the highest encountered so far. If false there are lower PTS values prior in the list.");
}
}
}

// Information about the current group of pictures this frame is part of.
// Lookup via decode timestamp is faster, but it may not always be available.
if let Some(gop_index) =
Expand Down
10 changes: 9 additions & 1 deletion crates/viewer/re_renderer/src/video/player.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,15 @@ impl VideoPlayer {
// special case: handle seeking backwards within a single GOP
// this is super inefficient, but it's the only way to handle it
// while maintaining a buffer of only 2 GOPs
if requested_sample_idx < self.current_sample_idx {
//
// Note that due to sample reordering (in the presence of b-frames), if can happen
// that `self.current_sample_idx` is *behind* the `requested_sample_idx` even if we're
// seeking backwards!
// Therefore, it's important to compare presentation timestamps instead of sample indices.
// (comparing decode timestamps should be equivalent to comparing sample indices)
let current_pts = self.data.samples[self.current_sample_idx].presentation_timestamp;
let requested_pts = self.data.samples[requested_sample_idx].presentation_timestamp;
if requested_pts < current_pts {
self.reset()?;
self.enqueue_gop(requested_gop_idx, video_data)?;
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
Expand Down
Loading