Squashed commit of the following:

teh-cmc · teh-cmc · commit 8b9c14fe5761 · 2023-04-01T16:19:09.000+02:00
commit ef2b5dc Author: Clement Rey <cr.rey.clement@gmail.com> Date: Sat Apr 1 16:15:11 2023 +0200 benchmarks for common vector ops across `smallvec`/`tinyvec`/std (#1747) * benchmarks for common vector ops * handle N=1 commit 731d941 Author: benjamin de charmoy <BenjaminDev@users.noreply.github.com> Date: Sat Apr 1 10:57:19 2023 +0200 Fix logged obb being displayed with half of the requested size (#1749) commit 1e84aa5 Author: Clement Rey <cr.rey.clement@gmail.com> Date: Fri Mar 31 17:43:57 2023 +0200 `arrow2_convert` primitive (de)serialization benchmarks (#1742) * arrow2_convert primitive benchmarks * addressing PR comments
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/re_arrow_store/Cargo.toml b/crates/re_arrow_store/Cargo.toml
@@ -81,6 +81,8 @@ polars-core = { workspace = true, features = [
   "sort_multiple",
 ] }
 rand = "0.8"
+smallvec = { version = "1.0", features = ["const_generics", "union"] }
+tinyvec = { version = "1.6", features = ["alloc", "rustc_1_55"] }
 
 
 [lib]
@@ -119,3 +121,7 @@ harness = false
 [[bench]]
 name = "arrow2_convert"
 harness = false
+
+[[bench]]
+name = "vectors"
+harness = false
diff --git a/crates/re_arrow_store/benches/vectors.rs b/crates/re_arrow_store/benches/vectors.rs
@@ -0,0 +1,329 @@
+//! Keeping track of performance issues/regressions for common vector operations.
+
+#[global_allocator]
+static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use smallvec::SmallVec;
+use tinyvec::TinyVec;
+
+criterion_group!(benches, sort, split, swap, swap_opt);
+criterion_main!(benches);
+
+// ---
+
+#[cfg(not(debug_assertions))]
+const NUM_INSTANCES: usize = 10_000;
+#[cfg(not(debug_assertions))]
+const SMALLVEC_SIZE: usize = 4;
+
+// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
+#[cfg(debug_assertions)]
+const NUM_INSTANCES: usize = 1;
+#[cfg(debug_assertions)]
+const SMALLVEC_SIZE: usize = 1;
+
+// --- Benchmarks ---
+
+fn split(c: &mut Criterion) {
+    let mut group = c.benchmark_group(format!("vector_ops/split_off/instances={NUM_INSTANCES}"));
+    group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
+
+    {
+        fn split_off<T: Copy, const N: usize>(
+            data: &mut SmallVec<[T; N]>,
+            split_idx: usize,
+        ) -> SmallVec<[T; N]> {
+            if split_idx >= data.len() {
+                return SmallVec::default();
+            }
+
+            let second_half = SmallVec::from_slice(&data[split_idx..]);
+            data.truncate(split_idx);
+            second_half
+        }
+
+        let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect();
+
+        group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}/manual"), |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                let second_half = split_off(&mut data, NUM_INSTANCES / 2);
+                assert_eq!(NUM_INSTANCES, data.len() + second_half.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]);
+                (data, second_half)
+            });
+        });
+    }
+
+    {
+        let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect();
+
+        group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                let second_half = data.split_off(NUM_INSTANCES / 2);
+                assert_eq!(NUM_INSTANCES, data.len() + second_half.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]);
+                (data, second_half)
+            });
+        });
+    }
+
+    {
+        fn split_off<T: Default + Copy, const N: usize>(
+            data: &mut TinyVec<[T; N]>,
+            split_idx: usize,
+        ) -> TinyVec<[T; N]> {
+            if split_idx >= data.len() {
+                return TinyVec::default();
+            }
+
+            let second_half = TinyVec::from(&data[split_idx..]);
+            data.truncate(split_idx);
+            second_half
+        }
+
+        let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect();
+
+        group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}/manual"), |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                let second_half = split_off(&mut data, NUM_INSTANCES / 2);
+                assert_eq!(NUM_INSTANCES, data.len() + second_half.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]);
+                (data, second_half)
+            });
+        });
+    }
+
+    {
+        let data: Vec<i64> = (0..NUM_INSTANCES as i64).collect();
+
+        group.bench_function("vec", |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                let second_half = data.split_off(NUM_INSTANCES / 2);
+                assert_eq!(NUM_INSTANCES, data.len() + second_half.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]);
+                (data, second_half)
+            });
+        });
+    }
+
+    {
+        fn split_off<T: Copy>(data: &mut Vec<T>, split_idx: usize) -> Vec<T> {
+            if split_idx >= data.len() {
+                return Vec::default();
+            }
+
+            let second_half = Vec::from(&data[split_idx..]);
+            data.truncate(split_idx);
+            second_half
+        }
+
+        let data: Vec<i64> = (0..NUM_INSTANCES as i64).collect();
+
+        group.bench_function("vec/manual", |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                let second_half = split_off(&mut data, NUM_INSTANCES / 2);
+                assert_eq!(NUM_INSTANCES, data.len() + second_half.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]);
+                (data, second_half)
+            });
+        });
+    }
+}
+
+fn sort(c: &mut Criterion) {
+    let mut group = c.benchmark_group(format!("vector_ops/sort/instances={NUM_INSTANCES}"));
+    group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
+
+    {
+        let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).rev().collect();
+
+        group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                data.sort_unstable();
+                assert_eq!(NUM_INSTANCES, data.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]);
+                data
+            });
+        });
+    }
+
+    {
+        let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).rev().collect();
+
+        group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                data.sort_unstable();
+                assert_eq!(NUM_INSTANCES, data.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]);
+                data
+            });
+        });
+    }
+
+    {
+        let data: Vec<i64> = (0..NUM_INSTANCES as i64).rev().collect();
+
+        group.bench_function("vec", |b| {
+            b.iter(|| {
+                let mut data = data.clone();
+                data.sort_unstable();
+                assert_eq!(NUM_INSTANCES, data.len());
+                assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]);
+                data
+            });
+        });
+    }
+}
+
+fn swap(c: &mut Criterion) {
+    let mut group = c.benchmark_group(format!("vector_ops/swap/instances={NUM_INSTANCES}"));
+    group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
+
+    {
+        let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect();
+        let swaps: SmallVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap];
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    (NUM_INSTANCES as i64 / 2).max(1) - 1,
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+
+    {
+        let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect();
+        let swaps: TinyVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap];
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    (NUM_INSTANCES as i64 / 2).max(1) - 1,
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+
+    {
+        let data: Vec<i64> = (0..NUM_INSTANCES as i64).collect();
+        let swaps: Vec<usize> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function("vec", |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap];
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    (NUM_INSTANCES as i64 / 2).max(1) - 1,
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+}
+
+fn swap_opt(c: &mut Criterion) {
+    let mut group = c.benchmark_group(format!("vector_ops/swap_opt/instances={NUM_INSTANCES}"));
+    group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
+
+    {
+        let data: SmallVec<[Option<i64>; SMALLVEC_SIZE]> =
+            (0..NUM_INSTANCES as i64).map(Some).collect();
+        let swaps: SmallVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let mut data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap].take();
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    Some((NUM_INSTANCES as i64 / 2).max(1) - 1),
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+
+    {
+        let data: TinyVec<[Option<i64>; SMALLVEC_SIZE]> =
+            (0..NUM_INSTANCES as i64).map(Some).collect();
+        let swaps: TinyVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let mut data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap].take();
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    Some((NUM_INSTANCES as i64 / 2).max(1) - 1),
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+
+    {
+        let data: Vec<Option<i64>> = (0..NUM_INSTANCES as i64).map(Some).collect();
+        let swaps: Vec<usize> = (0..NUM_INSTANCES).rev().collect();
+
+        group.bench_function("vec", |b| {
+            b.iter(|| {
+                let mut data1 = data.clone();
+                let mut data2 = data.clone();
+                for &swap in &swaps {
+                    data1[NUM_INSTANCES - swap - 1] = data2[swap].take();
+                }
+                assert_eq!(NUM_INSTANCES, data1.len());
+                assert_eq!(NUM_INSTANCES, data2.len());
+                assert_eq!(
+                    Some((NUM_INSTANCES as i64 / 2).max(1) - 1),
+                    data1[NUM_INSTANCES / 2]
+                );
+                (data1, data2)
+            });
+        });
+    }
+}