Skip to content

Commit c7ea722

Browse files
emilkjleibs
authored andcommitted
Fix quadratic slowdown when ingesting data with uniform time (#3088)
### What * Closes #3086 * Closes #433 This should also overall just speed up data insertion for the common case of already-sorted data ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/3088) (if applicable) - [PR Build Summary](https://build.rerun.io/pr/3088) - [Docs preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/docs) <!--DOCS-PREVIEW--> - [Examples preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/examples) <!--EXAMPLES-PREVIEW--><!--EXAMPLES-PREVIEW--> - [Recent benchmark results](https://ref.rerun.io/dev/bench/) - [Wasm size tracking](https://ref.rerun.io/dev/sizes/)
1 parent f08e570 commit c7ea722

File tree

1 file changed

+21
-11
lines changed

1 file changed

+21
-11
lines changed

crates/re_arrow_store/src/store_write.rs

+21-11
Original file line numberDiff line numberDiff line change
@@ -376,15 +376,21 @@ impl IndexedTable {
376376
}
377377
}
378378

379-
debug!(
380-
kind = "insert",
381-
timeline = %timeline.name(),
382-
time = timeline.typ().format(time),
383-
entity = %ent_path,
384-
len_limit = config.indexed_bucket_num_rows,
385-
len, len_overflow,
386-
"couldn't split indexed bucket, proceeding to ignore limits"
379+
let bucket_time_range = bucket.inner.read().time_range;
380+
381+
re_log::debug_once!(
382+
"Failed to split bucket on timeline {}",
383+
bucket.timeline.format_time_range(&bucket_time_range)
387384
);
385+
386+
if bucket_time_range.min == bucket_time_range.max {
387+
re_log::warn_once!(
388+
"Found over {} rows with the same timepoint {:?}={} - perhaps you forgot to update or remove the timeline?",
389+
config.indexed_bucket_num_rows,
390+
bucket.timeline.name(),
391+
bucket.timeline.typ().format(bucket_time_range.min)
392+
);
393+
}
388394
}
389395

390396
trace!(
@@ -433,6 +439,13 @@ impl IndexedBucket {
433439
} = &mut *inner;
434440

435441
// append time to primary column and update time range appropriately
442+
443+
if let Some(last_time) = col_time.last() {
444+
if time.as_i64() < *last_time {
445+
*is_sorted = false;
446+
}
447+
}
448+
436449
col_time.push(time.as_i64());
437450
*time_range = TimeRange::new(time_range.min.min(time), time_range.max.max(time));
438451
size_bytes_added += time.as_i64().total_size_bytes();
@@ -491,9 +504,6 @@ impl IndexedBucket {
491504
}
492505
}
493506

494-
// TODO(#433): re_datastore: properly handle already sorted data during insertion
495-
*is_sorted = false;
496-
497507
*size_bytes += size_bytes_added;
498508

499509
#[cfg(debug_assertions)]

0 commit comments

Comments
 (0)