Skip to content

Commit c81fe6a

Browse files
authored
Improved CLI 3: stdin streaming support (#7092)
You can now do this: ``` cat docs/snippets/all/archetypes/*_rust.rrd | rerun rrd print ``` and this: ``` cat docs/snippets/all/archetypes/*_rust.rrd | rrd merge -o /tmp/all_merged.rrd ``` and this ``` cat docs/snippets/all/archetypes/*_rust.rrd | rerun rrd compact --max-rows 99999999 --max-bytes 999999999 -o /tmp/all_compacted_max.rrd ``` - Part of #7048 - DNM: requires #7091
1 parent 84f63a0 commit c81fe6a

File tree

7 files changed

+266
-102
lines changed

7 files changed

+266
-102
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -5543,6 +5543,7 @@ dependencies = [
55435543
"re_crash_handler",
55445544
"re_data_source",
55455545
"re_entity_db",
5546+
"re_error",
55465547
"re_format",
55475548
"re_log",
55485549
"re_log_encoding",

crates/top/rerun/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ re_build_info.workspace = true
109109
re_chunk.workspace = true
110110
re_crash_handler.workspace = true
111111
re_entity_db.workspace = true
112+
re_error.workspace = true
112113
re_format.workspace = true
113114
re_log_types.workspace = true
114115
re_log.workspace = true

crates/top/rerun/src/commands/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ impl CallSource {
2323

2424
mod entrypoint;
2525
mod rrd;
26+
mod stdio;
2627

2728
#[cfg(feature = "analytics")]
2829
mod analytics;
2930

3031
pub use self::entrypoint::run;
3132
pub use self::rrd::RrdCommands;
33+
pub use self::stdio::read_rrd_streams_from_file_or_stdin;
3234

3335
#[cfg(feature = "analytics")]
3436
pub(crate) use self::analytics::AnalyticsCommands;

crates/top/rerun/src/commands/rrd/merge_compact.rs

+88-45
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,35 @@
11
use std::path::PathBuf;
22

33
use anyhow::Context as _;
4-
use itertools::Itertools as _;
54

65
use re_chunk_store::ChunkStoreConfig;
76
use re_entity_db::EntityDb;
87
use re_log_types::{LogMsg, StoreId};
98
use re_sdk::StoreKind;
109

10+
use crate::commands::read_rrd_streams_from_file_or_stdin;
11+
1112
// ---
1213

1314
#[derive(Debug, Clone, clap::Parser)]
1415
pub struct MergeCommand {
16+
/// Paths to read from. Reads from standard input if none are specified.
1517
path_to_input_rrds: Vec<String>,
1618

1719
#[arg(short = 'o', long = "output", value_name = "dst.(rrd|rbl)")]
1820
path_to_output_rrd: String,
21+
22+
/// If set, will try to proceed even in the face of IO and/or decoding errors in the input data.
23+
#[clap(long, default_value_t = false)]
24+
best_effort: bool,
1925
}
2026

2127
impl MergeCommand {
2228
pub fn run(&self) -> anyhow::Result<()> {
2329
let Self {
2430
path_to_input_rrds,
2531
path_to_output_rrd,
32+
best_effort,
2633
} = self;
2734

2835
// NOTE #1: We're doing headless processing, there's no point in running subscribers, it will just
@@ -31,14 +38,20 @@ impl MergeCommand {
3138
// (e.g. by recompacting it differently), so make sure to disable all these features.
3239
let store_config = ChunkStoreConfig::ALL_DISABLED;
3340

34-
merge_and_compact(&store_config, path_to_input_rrds, path_to_output_rrd)
41+
merge_and_compact(
42+
*best_effort,
43+
&store_config,
44+
path_to_input_rrds,
45+
path_to_output_rrd,
46+
)
3547
}
3648
}
3749

3850
// ---
3951

4052
#[derive(Debug, Clone, clap::Parser)]
4153
pub struct CompactCommand {
54+
/// Paths to read from. Reads from standard input if none are specified.
4255
path_to_input_rrds: Vec<String>,
4356

4457
#[arg(short = 'o', long = "output", value_name = "dst.(rrd|rbl)")]
@@ -63,6 +76,10 @@ pub struct CompactCommand {
6376
/// Overrides RERUN_CHUNK_MAX_ROWS_IF_UNSORTED if set.
6477
#[arg(long = "max-rows-if-unsorted")]
6578
max_rows_if_unsorted: Option<u64>,
79+
80+
/// If set, will try to proceed even in the face of IO and/or decoding errors in the input data.
81+
#[clap(long, default_value_t = false)]
82+
best_effort: bool,
6683
}
6784

6885
impl CompactCommand {
@@ -73,6 +90,7 @@ impl CompactCommand {
7390
max_bytes,
7491
max_rows,
7592
max_rows_if_unsorted,
93+
best_effort,
7694
} = self;
7795

7896
let mut store_config = ChunkStoreConfig::from_env().unwrap_or_default();
@@ -90,29 +108,38 @@ impl CompactCommand {
90108
store_config.chunk_max_rows_if_unsorted = *max_rows_if_unsorted;
91109
}
92110

93-
merge_and_compact(&store_config, path_to_input_rrds, path_to_output_rrd)
111+
merge_and_compact(
112+
*best_effort,
113+
&store_config,
114+
path_to_input_rrds,
115+
path_to_output_rrd,
116+
)
94117
}
95118
}
96119

97120
fn merge_and_compact(
121+
best_effort: bool,
98122
store_config: &ChunkStoreConfig,
99123
path_to_input_rrds: &[String],
100124
path_to_output_rrd: &str,
101125
) -> anyhow::Result<()> {
102-
let path_to_input_rrds = path_to_input_rrds.iter().map(PathBuf::from).collect_vec();
103126
let path_to_output_rrd = PathBuf::from(path_to_output_rrd);
104127

105-
let rrds_in: Result<Vec<_>, _> = path_to_input_rrds
106-
.iter()
107-
.map(|path_to_input_rrd| {
108-
std::fs::File::open(path_to_input_rrd).with_context(|| format!("{path_to_input_rrd:?}"))
128+
let rrds_in_size = {
129+
let rrds_in: Result<Vec<_>, _> = path_to_input_rrds
130+
.iter()
131+
.map(|path_to_input_rrd| {
132+
std::fs::File::open(path_to_input_rrd)
133+
.with_context(|| format!("{path_to_input_rrd:?}"))
134+
})
135+
.collect();
136+
rrds_in.ok().and_then(|rrds_in| {
137+
rrds_in
138+
.iter()
139+
.map(|rrd_in| rrd_in.metadata().ok().map(|md| md.len()))
140+
.sum::<Option<u64>>()
109141
})
110-
.collect();
111-
let rrds_in = rrds_in?;
112-
let rrds_in_size = rrds_in
113-
.iter()
114-
.map(|rrd_in| rrd_in.metadata().ok().map(|md| md.len()))
115-
.sum::<Option<u64>>();
142+
};
116143

117144
let file_size_to_string = |size: Option<u64>| {
118145
size.map_or_else(
@@ -121,42 +148,53 @@ fn merge_and_compact(
121148
)
122149
};
123150

151+
let now = std::time::Instant::now();
124152
re_log::info!(
125-
max_num_rows = %re_format::format_uint(store_config.chunk_max_rows),
126-
max_num_bytes = %re_format::format_bytes(store_config.chunk_max_bytes as _),
127-
dst = ?path_to_output_rrd,
153+
max_rows = %re_format::format_uint(store_config.chunk_max_rows),
154+
max_rows_if_unsorted = %re_format::format_uint(store_config.chunk_max_rows_if_unsorted),
155+
max_bytes = %re_format::format_bytes(store_config.chunk_max_bytes as _),
128156
srcs = ?path_to_input_rrds,
129-
src_size_bytes = %file_size_to_string(rrds_in_size),
130-
"merge started"
157+
"merge/compaction started"
131158
);
132159

133-
let now = std::time::Instant::now();
160+
// TODO(cmc): might want to make this configurable at some point.
161+
let version_policy = re_log_encoding::decoder::VersionPolicy::Warn;
162+
let rx = read_rrd_streams_from_file_or_stdin(version_policy, path_to_input_rrds);
134163

135164
let mut entity_dbs: std::collections::HashMap<StoreId, EntityDb> = Default::default();
136-
let mut version = None;
137-
for rrd_in in rrds_in {
138-
let version_policy = re_log_encoding::decoder::VersionPolicy::Warn;
139-
let decoder = re_log_encoding::decoder::Decoder::new(version_policy, rrd_in)?;
140-
version = version.max(Some(decoder.version()));
141-
for msg in decoder {
142-
let msg = msg.context("decode rrd message")?;
143-
entity_dbs
144-
.entry(msg.store_id().clone())
145-
.or_insert_with(|| {
146-
re_entity_db::EntityDb::with_store_config(
147-
msg.store_id().clone(),
148-
store_config.clone(),
149-
)
150-
})
151-
.add(&msg)
152-
.context("decode rrd file contents")?;
165+
166+
for res in rx {
167+
let mut is_success = true;
168+
169+
match res {
170+
Ok(msg) => {
171+
if let Err(err) = entity_dbs
172+
.entry(msg.store_id().clone())
173+
.or_insert_with(|| {
174+
re_entity_db::EntityDb::with_store_config(
175+
msg.store_id().clone(),
176+
store_config.clone(),
177+
)
178+
})
179+
.add(&msg)
180+
{
181+
re_log::error!(%err, "couldn't index corrupt chunk");
182+
is_success = false;
183+
}
184+
}
185+
186+
Err(err) => {
187+
re_log::error!(err = re_error::format(err));
188+
is_success = false;
189+
}
153190
}
154-
}
155191

156-
anyhow::ensure!(
157-
!entity_dbs.is_empty(),
158-
"no recordings found in rrd/rbl file"
159-
);
192+
if !best_effort && !is_success {
193+
anyhow::bail!(
194+
"one or more IO and/or decoding failures in the input stream (check logs)"
195+
)
196+
}
197+
}
160198

161199
let mut rrd_out = std::fs::File::create(&path_to_output_rrd)
162200
.with_context(|| format!("{path_to_output_rrd:?}"))?;
@@ -178,7 +216,12 @@ fn merge_and_compact(
178216
let messages_rrd = messages_rrd.iter().flatten();
179217

180218
let encoding_options = re_log_encoding::EncodingOptions::COMPRESSED;
181-
let version = version.unwrap_or(re_build_info::CrateVersion::LOCAL);
219+
let version = entity_dbs
220+
.values()
221+
.next()
222+
.and_then(|db| db.store_info())
223+
.and_then(|info| info.store_version)
224+
.unwrap_or(re_build_info::CrateVersion::LOCAL);
182225
re_log_encoding::encoder::encode(
183226
version,
184227
encoding_options,
@@ -187,7 +230,7 @@ fn merge_and_compact(
187230
messages_rbl.chain(messages_rrd),
188231
&mut rrd_out,
189232
)
190-
.context("Message encode")?;
233+
.context("couldn't encode messages")?;
191234

192235
let rrd_out_size = rrd_out.metadata().ok().map(|md| md.len());
193236

@@ -208,7 +251,7 @@ fn merge_and_compact(
208251
compaction_ratio,
209252
srcs = ?path_to_input_rrds,
210253
srcs_size_bytes = %file_size_to_string(rrds_in_size),
211-
"compaction finished"
254+
"merge/compaction finished"
212255
);
213256

214257
Ok(())

crates/top/rerun/src/commands/rrd/mod.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@ pub enum RrdCommands {
1919
/// This ignores the `log_time` timeline.
2020
Compare(CompareCommand),
2121

22-
/// Print the contents of one or more .rrd/.rbl files.
22+
/// Print the contents of one or more .rrd/.rbl files/streams.
23+
///
24+
/// Reads from standard input if no paths are specified.
2325
///
2426
/// Example: `rerun rrd print /my/recordings/*.rrd`
2527
Print(PrintCommand),
2628

27-
/// Compacts the contents of one or more .rrd/.rbl files and writes the result to a new file.
29+
/// Compacts the contents of one or more .rrd/.rbl files/streams and writes the result to a new file.
30+
///
31+
/// Reads from standard input if no paths are specified.
2832
///
2933
/// Uses the usual environment variables to control the compaction thresholds:
3034
/// `RERUN_CHUNK_MAX_ROWS`,
@@ -40,7 +44,9 @@ pub enum RrdCommands {
4044
/// * `rerun rrd compact --max-rows 4096 --max-bytes=1048576 /my/recordings/*.rrd -o output.rrd`
4145
Compact(CompactCommand),
4246

43-
/// Merges the contents of multiple .rrd/.rbl files, and writes the result to a new file.
47+
/// Merges the contents of multiple .rrd/.rbl files/streams, and writes the result to a new file.
48+
///
49+
/// Reads from standard input if no paths are specified.
4450
///
4551
/// This will not affect the chunking of the data in any way.
4652
///

0 commit comments

Comments
 (0)