Skip to content

Commit ac8ef01

Browse files
authored
Merge pull request #1 from CeresDB/remove-collect
remove unnecessary collect
2 parents 5dd9045 + 2a49129 commit ac8ef01

File tree

2 files changed

+77
-45
lines changed

2 files changed

+77
-45
lines changed

README.md

+19-15
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,28 @@ or
2222

2323
```toml
2424
[dependencies]
25-
xorfilter-rs = { git = "https://github.com/bnclabs/xorfilter" }
25+
xorfilter-rs = { git = "https://github.com/prataprc/xorfilter" }
2626
```
2727

2828
```rust
29-
use xorfilter::Xor8;
30-
31-
let mut keys: Vec<u64> = vec![];
32-
for _ in 0..num_keys {
33-
keys.push(rng.gen());
34-
}
35-
36-
let mut filter = Xor8::new(); // new filter.
37-
filter.populate_keys(&keys); // populate keys.
38-
filter.build(); // build bitmap.
39-
40-
for key in 0..lookup {
41-
// there can be false positives, but no false negatives.
42-
filter.contains_key(key);
29+
use xorfilter::{xor8::Xor8Builder, BuildHasherDefault};
30+
31+
fn main() {
32+
let num_keys = 1000;
33+
let lookup = 10;
34+
let mut b = Xor8Builder::<BuildHasherDefault>::default();
35+
for i in 0..num_keys {
36+
if i % 2 == 0 {
37+
b.insert(&i);
38+
}
39+
}
40+
41+
// build bitmap.
42+
let filter = b.build().unwrap();
43+
for key in 0..lookup {
44+
// there can be false positives, but no false negatives.
45+
println!("{key}: {}", filter.contains(&key));
46+
}
4347
}
4448
```
4549

src/xor8/builder.rs

+58-30
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
use std::hash::BuildHasher;
2-
use std::hash::Hash;
3-
use std::hash::Hasher;
4-
use std::sync::Arc;
5-
6-
use crate::xor8::filter::fingerprint;
7-
use crate::xor8::filter::splitmix64;
8-
use crate::xor8::filter::XorSet;
9-
use crate::xor8::Xor8;
10-
use crate::BuildHasherDefault;
1+
use std::{
2+
hash::{BuildHasher, Hash, Hasher},
3+
sync::Arc,
4+
};
5+
6+
use crate::{
7+
xor8::{
8+
filter::{fingerprint, splitmix64, XorSet},
9+
Xor8,
10+
},
11+
BuildHasherDefault,
12+
};
1113

1214
#[derive(Clone, Copy, Default)]
1315
struct KeyIndex {
@@ -56,15 +58,17 @@ type U64HashSet = ::std::collections::HashSet<u64, U64IdentifyBuildHasher>;
5658
/// ```
5759
#[derive(Clone, Debug)]
5860
pub struct Xor8Builder<H = BuildHasherDefault>
59-
where H: BuildHasher + Clone
61+
where
62+
H: BuildHasher + Clone,
6063
{
6164
digests: U64HashSet,
6265
pub num_digests: usize,
6366
pub hash_builder: H,
6467
}
6568

6669
impl<H> Default for Xor8Builder<H>
67-
where H: BuildHasher + Clone + Default
70+
where
71+
H: BuildHasher + Clone + Default,
6872
{
6973
fn default() -> Self {
7074
Self {
@@ -76,11 +80,14 @@ where H: BuildHasher + Clone + Default
7680
}
7781

7882
impl<H> Xor8Builder<H>
79-
where H: BuildHasher + Clone
83+
where
84+
H: BuildHasher + Clone,
8085
{
8186
/// New Xor8 builder initialized with [BuildHasherDefault].
8287
pub fn new() -> Self
83-
where H: Default {
88+
where
89+
H: Default,
90+
{
8491
Self::default()
8592
}
8693

@@ -107,8 +114,8 @@ where H: BuildHasher + Clone
107114

108115
/// Insert 64-bit digest of a single key.
109116
///
110-
/// Digest for the key shall be generated using the default-hasher or via hasher
111-
/// supplied via [Xor8Builder::with_hasher] method.
117+
/// Digest for the key shall be generated using the default-hasher or via
118+
/// hasher supplied via [Xor8Builder::with_hasher] method.
112119
pub fn insert<K: ?Sized + Hash>(&mut self, key: &K) {
113120
let digest = self.hash(key);
114121

@@ -118,8 +125,8 @@ where H: BuildHasher + Clone
118125

119126
/// Populate with 64-bit digests for a collection of keys of type `K`.
120127
///
121-
/// Digest for key shall be generated using the default-hasher or via hasher supplied
122-
/// via [Xor8Builder::with_hasher] method.
128+
/// Digest for key shall be generated using the default-hasher or via hasher
129+
/// supplied via [Xor8Builder::with_hasher] method.
123130
pub fn populate<'i, K: Hash + 'i, I: IntoIterator<Item = &'i K>>(&mut self, keys: I) {
124131
let mut n = 0;
125132

@@ -145,28 +152,49 @@ where H: BuildHasher + Clone
145152
self.num_digests += n;
146153
}
147154

148-
/// Build bitmap for keys that where previously inserted using [Xor8Builder::insert],
149-
/// [Xor8Builder::populate] and [Xor8Builder::populate_digests] method.
155+
/// Build bitmap for keys that where previously inserted using
156+
/// [Xor8Builder::insert], [Xor8Builder::populate] and
157+
/// [Xor8Builder::populate_digests] method.
150158
pub fn build(&mut self) -> Result<Xor8<H>, crate::Error> {
151-
let digests = self.digests.iter().copied().collect::<Vec<u64>>();
152-
self.build_from_digests(&digests)
159+
let digest_len = self.digests.len();
160+
Self::build_inner(
161+
self.hash_builder.clone(),
162+
self.digests.iter().copied(),
163+
digest_len,
164+
)
153165
}
154166

155167
/// Build a bitmap for pre-computed 64-bit digests for keys.
156168
///
157169
/// If keys where previously inserted using [Xor8Builder::insert] or
158-
/// [Xor8Builder::populate] or [Xor8Builder::populate_digests] methods, they shall be
159-
/// ignored.
170+
/// [Xor8Builder::populate] or [Xor8Builder::populate_digests] methods, they
171+
/// shall be ignored.
160172
///
161-
/// It is upto the caller to ensure that digests are unique, that there no duplicates.
173+
/// It is upto the caller to ensure that digests are unique, that there no
174+
/// duplicates.
162175
pub fn build_from_digests(
163176
&mut self,
164177
digests: &[u64],
165178
) -> Result<Xor8<H>, crate::Error> {
166-
let mut ff = Xor8::<H>::new(self.hash_builder.clone());
179+
Self::build_inner(
180+
self.hash_builder.clone(),
181+
digests.iter().copied(),
182+
digests.len(),
183+
)
184+
}
167185

168-
ff.num_keys = Some(digests.len());
169-
let (size, mut rngcounter) = (digests.len(), 1_u64);
186+
fn build_inner<It>(
187+
hash_builder: H,
188+
digest_iter: It,
189+
digest_len: usize,
190+
) -> Result<Xor8<H>, crate::Error>
191+
where
192+
It: Iterator<Item = u64> + Clone,
193+
{
194+
let mut ff = Xor8::<H>::new(hash_builder);
195+
196+
ff.num_keys = Some(digest_len);
197+
let (size, mut rngcounter) = (digest_len, 1_u64);
170198
let capacity = {
171199
let capacity = 32 + ((1.23 * (size as f64)).ceil() as u32);
172200
capacity / 3 * 3 // round it down to a multiple of 3
@@ -185,8 +213,8 @@ where H: BuildHasher + Clone
185213
let mut sets2: Vec<XorSet> = vec![XorSet::default(); block_length];
186214

187215
loop {
188-
for key in digests.iter() {
189-
let hs = ff.get_h0h1h2(*key);
216+
for key in digest_iter.clone() {
217+
let hs = ff.get_h0h1h2(key);
190218
sets0[hs.h0 as usize].xor_mask ^= hs.h;
191219
sets0[hs.h0 as usize].count += 1;
192220
sets1[hs.h1 as usize].xor_mask ^= hs.h;

0 commit comments

Comments
 (0)