add some fn

Signed-off-by: feathercyc <feathercyc@163.com>
xline-kv · Jul 4, 2024 · 4af029c · 4af029c
1 parent 18afea8
commit 4af029c
Show file tree

Hide file tree

Showing 14 changed files with 2,075 additions and 474 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,14 +8,20 @@ license = "Apache-2.0"
 keywords = ["Interval Tree", "Augmented Tree", "Red-Black Tree"]
 
 [dependencies]
+serde = { version = "1.0", default-features = false, features = [
+    "derive",
+    "std",
+], optional = true }
 
 [dev-dependencies]
 criterion = "0.5.1"
 rand = "0.8.5"
+serde_json = "1.0"
 
 [features]
 default = []
-interval_tree_find_overlap_ordered = []
+graphviz = []
+serde = ["dep:serde"]
 
 [[bench]]
 name = "bench"

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ The implementation of the interval tree in interval_map references "Introduction
 To safely and efficiently handle insertion and deletion operations in Rust, `interval_map` innovatively **uses arrays to simulate pointers** for managing the parent-child references in the red-black tree. This approach also ensures that interval_map has the `Send` and `Unpin` traits, allowing it to be safely transferred between threads and to maintain a fixed memory location during asynchronous operations.
 
 `interval_map` implements an `IntervalMap` struct:
-- It accepts `Interval<T>` as the key, where `T` can be any type that implements `Ord+Clone` trait. Therefore, intervals such as $[1, 2)$ and $["aaa", "bbb")$ are allowed
+- It accepts `Interval<T>` as the key, where `T` can be any type that implements `Ord` trait. Therefore, intervals such as $[1, 2)$ and $["aaa", "bbb")$ are allowed
 - The value can be of any type
 
 `interval_map` supports `insert`, `delete`, and `iter` fns. Traversal is performed in the order of `Interval<T>` . For instance, with intervals of type `Interval<u32>`:
@@ -22,15 +22,16 @@ Currently, `interval_map` only supports half-open intervals, i.e., $[...,...)$.
 
 The benchmark was conducted on a platform with `AMD R7 7840H + DDR5 5600MHz`. The result are as follows:
 1. Only insert
-    | insert          | 100       | 1000      | 10, 000   | 100, 000  |
-    | --------------- | --------- | --------- | --------- | --------- |
-    | Time per insert | 5.4168 µs | 80.518 µs | 2.2823 ms | 36.528 ms |
+    | insert     | 100       | 1000      | 10, 000   | 100, 000  |
+    | ---------- | --------- | --------- | --------- | --------- |
+    | Total time | 5.4168 µs | 80.518 µs | 2.2823 ms | 36.528 ms |
 2. Insert N and remove N
-    | insert_and_remove  | 100       | 1000      | 10, 000   | 100, 000  |
-    | ------------------ | --------- | --------- | --------- | --------- |
-    | Time per operation | 10.333 µs | 223.43 µs | 4.9358 ms | 81.634 ms |
+    | insert_and_remove | 100       | 1000      | 10, 000   | 100, 000  |
+    | ----------------- | --------- | --------- | --------- | --------- |
+    | Total time        | 10.333 µs | 223.43 µs | 4.9358 ms | 81.634 ms |
 
 ## TODO
-- [] Support for $(...,...)$, $[...,...]$ and $(...,...]$ interval types.
-- [] Add more tests like [etcd](https://github.com/etcd-io/etcd/blob/main/pkg/adt/interval_tree_test.go)
-- [] Add Point type for Interval
+- [ ] ~~Support for $(...,...)$, $[...,...]$ and $(...,...]$ interval types.~~ There's no way to support these interval type without performance loss now.
+- [ ] ~~Add Point type for Interval~~ To support Point type, it should also support $[...,...]$, so it couldn't be supported now, either. But you could write code like [examples/new_point](examples/new_point.rs).
+- [x] Add more tests like [etcd](https://github.com/etcd-io/etcd/blob/main/pkg/adt/interval_tree_test.go).
+- [x] Refine iter mod.
diff --git a/benches/bench.rs b/benches/bench.rs
@@ -100,14 +100,69 @@ fn bench_interval_map_insert_remove(c: &mut Criterion) {
     });
 }
 
+// insert helper fn
+fn interval_map_filter_iter(count: usize, bench: &mut Bencher) {
+    let mut gen = IntervalGenerator::new();
+    let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect();
+    let mut map = IntervalMap::new();
+    for i in intervals.clone() {
+        map.insert(i, ());
+    }
+    bench.iter(|| {
+        for i in intervals.clone() {
+            black_box(map.filter_iter(&i).collect::<Vec<_>>());
+        }
+    });
+}
+
+// insert helper fn
+fn interval_map_iter_filter(count: usize, bench: &mut Bencher) {
+    let mut gen = IntervalGenerator::new();
+    let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect();
+    let mut map = IntervalMap::new();
+    for i in intervals.clone() {
+        map.insert(i, ());
+    }
+    bench.iter(|| {
+        for i in intervals.clone() {
+            black_box(map.iter().filter(|v| v.0.overlap(&i)).collect::<Vec<_>>());
+        }
+    });
+}
+
+fn bench_interval_map_filter_iter(c: &mut Criterion) {
+    c.bench_function("bench_interval_map_filter_iter_100", |b| {
+        interval_map_filter_iter(100, b)
+    });
+    c.bench_function("bench_interval_map_filter_iter_1000", |b| {
+        interval_map_filter_iter(1000, b)
+    });
+}
+
+fn bench_interval_map_iter_filter(c: &mut Criterion) {
+    c.bench_function("bench_interval_map_iter_filter_100", |b| {
+        interval_map_iter_filter(100, b)
+    });
+    c.bench_function("bench_interval_map_iter_filter_1000", |b| {
+        interval_map_iter_filter(1000, b)
+    });
+}
+
 fn criterion_config() -> Criterion {
     Criterion::default().configure_from_args().without_plots()
 }
 
 criterion_group! {
-    name = benches;
+    name = benches_basic_op;
+    config = criterion_config();
+    targets = bench_interval_map_insert, bench_interval_map_insert_remove,
+}
+
+criterion_group! {
+    name = benches_iter;
     config = criterion_config();
-    targets = bench_interval_map_insert, bench_interval_map_insert_remove
+    targets = bench_interval_map_filter_iter, bench_interval_map_iter_filter
 }
 
-criterion_main!(benches);
+// criterion_main!(benches_basic_op, benches_iter);
+criterion_main!(benches_iter);
diff --git a/examples/new_point.rs b/examples/new_point.rs
@@ -0,0 +1,27 @@
+use interval_map::{Interval, IntervalMap};
+
+trait Point<T> {
+    fn new_point(x: T) -> Interval<T>;
+}
+
+impl Point<u32> for Interval<u32> {
+    fn new_point(x: u32) -> Self {
+        Interval::new(x, x + 1)
+    }
+}
+
+fn main() {
+    let mut interval_map = IntervalMap::<u32, i32>::new();
+    interval_map.insert(Interval::new(3, 7), 20);
+    interval_map.insert(Interval::new(2, 6), 15);
+
+    let tmp_point = Interval::new_point(5);
+    assert_eq!(tmp_point, Interval::new(5, 6));
+
+    interval_map.insert(tmp_point.clone(), 10);
+    assert_eq!(interval_map.get(&tmp_point).unwrap(), &10);
+    assert_eq!(
+        interval_map.find_all_overlap(&Interval::new_point(5)).len(),
+        3
+    );
+}
diff --git a/examples/string_affine.rs b/examples/string_affine.rs
@@ -0,0 +1,73 @@
+use std::cmp;
+
+use interval_map::{Interval, IntervalMap};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum StringAffine {
+    /// String
+    String(String),
+    /// Unbounded
+    Unbounded,
+}
+
+impl StringAffine {
+    pub fn new_key(s: &str) -> Self {
+        Self::String(s.to_string())
+    }
+
+    pub fn new_unbounded() -> Self {
+        Self::Unbounded
+    }
+}
+
+impl PartialOrd for StringAffine {
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        match (self, other) {
+            (StringAffine::String(x), StringAffine::String(y)) => x.partial_cmp(y),
+            (StringAffine::String(_), StringAffine::Unbounded) => Some(cmp::Ordering::Less),
+            (StringAffine::Unbounded, StringAffine::String(_)) => Some(cmp::Ordering::Greater),
+            (StringAffine::Unbounded, StringAffine::Unbounded) => Some(cmp::Ordering::Equal),
+        }
+    }
+}
+
+impl Ord for StringAffine {
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        match (self, other) {
+            (StringAffine::String(x), StringAffine::String(y)) => x.cmp(y),
+            (StringAffine::String(_), StringAffine::Unbounded) => cmp::Ordering::Less,
+            (StringAffine::Unbounded, StringAffine::String(_)) => cmp::Ordering::Greater,
+            (StringAffine::Unbounded, StringAffine::Unbounded) => cmp::Ordering::Equal,
+        }
+    }
+}
+
+trait Point<T> {
+    fn new_point(x: T) -> Interval<T>;
+}
+
+impl Point<StringAffine> for Interval<StringAffine> {
+    fn new_point(x: StringAffine) -> Interval<StringAffine> {
+        match x {
+            StringAffine::String(mut x_string) => {
+                let low = x_string.clone();
+                x_string.push('\0');
+                Interval::new(
+                    StringAffine::new_key(&low),
+                    StringAffine::new_key(&x_string),
+                )
+            }
+            _ => panic!("new_point only receive StringAffine::String!"),
+        }
+    }
+}
+
+fn main() {
+    let mut interval_map = IntervalMap::<StringAffine, u32>::new();
+    interval_map.insert(
+        Interval::new(StringAffine::new_key("8"), StringAffine::Unbounded),
+        123,
+    );
+    assert!(interval_map.overlap(&Interval::new_point(StringAffine::new_key("9"))));
+    assert!(!interval_map.overlap(&Interval::new_point(StringAffine::new_key("7"))));
+}
diff --git a/src/entry.rs b/src/entry.rs
@@ -5,7 +5,10 @@ use crate::node::Node;
 
 /// A view into a single entry in a map, which may either be vacant or occupied.
 #[derive(Debug)]
-pub enum Entry<'a, T, V, Ix> {
+pub enum Entry<'a, T, V, Ix>
+where
+    T: Ord,
+{
     /// An occupied entry.
     Occupied(OccupiedEntry<'a, T, V, Ix>),
     /// A vacant entry.
@@ -15,17 +18,23 @@ pub enum Entry<'a, T, V, Ix> {
 /// A view into an occupied entry in a `IntervalMap`.
 /// It is part of the [`Entry`] enum.
 #[derive(Debug)]
-pub struct OccupiedEntry<'a, T, V, Ix> {
+pub struct OccupiedEntry<'a, T, V, Ix>
+where
+    T: Ord,
+{
     /// Reference to the map
     pub map_ref: &'a mut IntervalMap<T, V, Ix>,
     /// The entry node
-    pub node: NodeIndex<Ix>,
+    pub node_idx: NodeIndex<Ix>,
 }
 
 /// A view into a vacant entry in a `IntervalMap`.
 /// It is part of the [`Entry`] enum.
 #[derive(Debug)]
-pub struct VacantEntry<'a, T, V, Ix> {
+pub struct VacantEntry<'a, T, V, Ix>
+where
+    T: Ord,
+{
     /// Mutable reference to the map
     pub map_ref: &'a mut IntervalMap<T, V, Ix>,
     /// The interval of this entry
@@ -53,7 +62,7 @@ where
     #[inline]
     pub fn or_insert(self, default: V) -> &'a mut V {
         match self {
-            Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node, Node::value_mut),
+            Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node_idx, Node::value_mut),
             Entry::Vacant(entry) => {
                 let entry_idx = NodeIndex::new(entry.map_ref.nodes.len());
                 let _ignore = entry.map_ref.insert(entry.interval, default);
@@ -88,7 +97,7 @@ where
     {
         match self {
             Entry::Occupied(entry) => {
-                f(entry.map_ref.node_mut(entry.node, Node::value_mut));
+                f(entry.map_ref.node_mut(entry.node_idx, Node::value_mut));
                 Self::Occupied(entry)
             }
             Entry::Vacant(entry) => Self::Vacant(entry),

diff --git a/src/index.rs b/src/index.rs
@@ -1,30 +1,49 @@
 use std::fmt;
 use std::hash::Hash;
 
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
 pub type DefaultIx = u32;
 
-pub unsafe trait IndexType: Copy + Default + Hash + Ord + fmt::Debug + 'static {
+pub trait IndexType: Copy + Default + Hash + Ord + fmt::Debug + 'static {
+    const SENTINEL: Self;
     fn new(x: usize) -> Self;
     fn index(&self) -> usize;
     fn max() -> Self;
+    fn is_sentinel(&self) -> bool {
+        *self == Self::SENTINEL
+    }
 }
 
-unsafe impl IndexType for u32 {
-    #[inline(always)]
-    fn new(x: usize) -> Self {
-        x as u32
-    }
-    #[inline(always)]
-    fn index(&self) -> usize {
-        *self as usize
-    }
-    #[inline(always)]
-    fn max() -> Self {
-        ::std::u32::MAX
-    }
+macro_rules! impl_index {
+    ($type:ident) => {
+        impl IndexType for $type {
+            const SENTINEL: Self = 0;
+
+            #[inline(always)]
+            fn new(x: usize) -> Self {
+                x as $type
+            }
+            #[inline(always)]
+            fn index(&self) -> usize {
+                *self as usize
+            }
+            #[inline(always)]
+            fn max() -> Self {
+                ::std::$type::MAX
+            }
+        }
+    };
 }
 
+impl_index!(u8);
+impl_index!(u16);
+impl_index!(u32);
+impl_index!(u64);
+
 /// Node identifier.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
 pub struct NodeIndex<Ix = DefaultIx>(Ix);
 
@@ -34,24 +53,30 @@ impl<Ix: IndexType> NodeIndex<Ix> {
         NodeIndex(IndexType::new(x))
     }
 
-    #[inline]
-    pub fn index(self) -> usize {
-        self.0.index()
-    }
-
     #[inline]
     pub fn end() -> Self {
         NodeIndex(IndexType::max())
     }
+
+    pub fn incre(&self) -> Self {
+        NodeIndex::new(self.index().wrapping_add(1))
+    }
 }
 
-unsafe impl<Ix: IndexType> IndexType for NodeIndex<Ix> {
+impl<Ix: IndexType> IndexType for NodeIndex<Ix> {
+    const SENTINEL: Self = NodeIndex(Ix::SENTINEL);
+
+    #[inline]
     fn index(&self) -> usize {
         self.0.index()
     }
+
+    #[inline]
     fn new(x: usize) -> Self {
         NodeIndex::new(x)
     }
+
+    #[inline]
     fn max() -> Self {
         NodeIndex(<Ix as IndexType>::max())
     }