Skip to content

Commit 4bc05e5

Browse files
authored
Fix quadratic slowdown when ingesting data with uniform time (#3088)
### What * Closes #3086 * Closes #433 This should also overall just speed up data insertion for the common case of already-sorted data ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/3088) (if applicable) - [PR Build Summary](https://build.rerun.io/pr/3088) - [Docs preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/docs) <!--DOCS-PREVIEW--> - [Examples preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/examples) <!--EXAMPLES-PREVIEW--><!--EXAMPLES-PREVIEW--> - [Recent benchmark results](https://ref.rerun.io/dev/bench/) - [Wasm size tracking](https://ref.rerun.io/dev/sizes/)
1 parent 6db856a commit 4bc05e5

File tree

1 file changed

+21
-11
lines changed

1 file changed

+21
-11
lines changed

crates/re_arrow_store/src/store_write.rs

+21-11
Original file line numberDiff line numberDiff line change
@@ -380,15 +380,21 @@ impl IndexedTable {
380380
}
381381
}
382382

383-
debug!(
384-
kind = "insert",
385-
timeline = %timeline.name(),
386-
time = timeline.typ().format(time),
387-
entity = %ent_path,
388-
len_limit = config.indexed_bucket_num_rows,
389-
len, len_overflow,
390-
"couldn't split indexed bucket, proceeding to ignore limits"
383+
let bucket_time_range = bucket.inner.read().time_range;
384+
385+
re_log::debug_once!(
386+
"Failed to split bucket on timeline {}",
387+
bucket.timeline.format_time_range(&bucket_time_range)
391388
);
389+
390+
if bucket_time_range.min == bucket_time_range.max {
391+
re_log::warn_once!(
392+
"Found over {} rows with the same timepoint {:?}={} - perhaps you forgot to update or remove the timeline?",
393+
config.indexed_bucket_num_rows,
394+
bucket.timeline.name(),
395+
bucket.timeline.typ().format(bucket_time_range.min)
396+
);
397+
}
392398
}
393399

394400
trace!(
@@ -437,6 +443,13 @@ impl IndexedBucket {
437443
} = &mut *inner;
438444

439445
// append time to primary column and update time range appropriately
446+
447+
if let Some(last_time) = col_time.last() {
448+
if time.as_i64() < *last_time {
449+
*is_sorted = false;
450+
}
451+
}
452+
440453
col_time.push(time.as_i64());
441454
*time_range = TimeRange::new(time_range.min.min(time), time_range.max.max(time));
442455
size_bytes_added += time.as_i64().total_size_bytes();
@@ -495,9 +508,6 @@ impl IndexedBucket {
495508
}
496509
}
497510

498-
// TODO(#433): re_datastore: properly handle already sorted data during insertion
499-
*is_sorted = false;
500-
501511
*size_bytes += size_bytes_added;
502512

503513
#[cfg(debug_assertions)]

0 commit comments

Comments
 (0)