-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 92a8df9
Showing
19 changed files
with
21,674 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/target | ||
/Cargo.lock | ||
/bench_results | ||
/out | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
[package] | ||
name = "foldhash" | ||
version = "0.1.0" | ||
authors = ["Orson Peters <orsonpeters@gmail.com>"] | ||
license = "Zlib" | ||
repository = "https://github.com/orlp/foldhash" | ||
readme = "README.md" | ||
keywords = ["hash", "hasher", "no-std"] | ||
categories = ["algorithms", "no-std"] | ||
description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm." | ||
edition = "2021" | ||
exclude = ["benches", "tools", "assets"] | ||
|
||
[features] | ||
default = ["std"] | ||
std = [] | ||
|
||
[dependencies] | ||
|
||
[dev-dependencies] | ||
criterion = "0.5" | ||
hashbrown = "0.14" | ||
uuid = "1.8" | ||
rand = "0.8" | ||
ahash = "0.8" | ||
fxhash = "0.2" | ||
chrono = "0.4" | ||
|
||
[lib] | ||
bench = false | ||
|
||
[[bench]] | ||
name = "bench" | ||
path = "benches/bench.rs" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "avalanche" | ||
path = "benches/avalanche.rs" | ||
harness = false | ||
|
||
[profile.release] | ||
lto = "thin" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
Copyright (c) 2024 Orson Peters | ||
|
||
This software is provided 'as-is', without any express or implied warranty. In | ||
no event will the authors be held liable for any damages arising from the use of | ||
this software. | ||
|
||
Permission is granted to anyone to use this software for any purpose, including | ||
commercial applications, and to alter it and redistribute it freely, subject to | ||
the following restrictions: | ||
|
||
1. The origin of this software must not be misrepresented; you must not claim | ||
that you wrote the original software. If you use this software in a product, | ||
an acknowledgment in the product documentation would be appreciated but is | ||
not required. | ||
|
||
2. Altered source versions must be plainly marked as such, and must not be | ||
misrepresented as being the original software. | ||
|
||
3. This notice may not be removed or altered from any source distribution. |
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
use std::hash::BuildHasher; | ||
|
||
use rand::prelude::*; | ||
|
||
fn compute_u64_avalanche<H: BuildHasher, F: FnMut() -> H>( | ||
num_hashers: usize, | ||
iters_per_hasher: usize, | ||
mut new_hasher: F, | ||
) -> Vec<f64> { | ||
let mut rng = thread_rng(); | ||
let mut worst_bias = vec![0.5f64; 64 * 64]; | ||
for _ in 0..num_hashers { | ||
let h = new_hasher(); | ||
let mut bit_flips = vec![0; 64 * 64]; | ||
for _ in 0..iters_per_hasher { | ||
let base_val: u64 = rng.gen(); | ||
let base_hash = h.hash_one(base_val); | ||
for flip_pos in 0..64 { | ||
let delta_val = base_val ^ (1 << flip_pos); | ||
let delta_hash = h.hash_one(delta_val); | ||
|
||
for test_pos in 0..64 { | ||
let flipped = ((base_hash ^ delta_hash) >> test_pos) & 1; | ||
bit_flips[test_pos * 64 + flip_pos] += flipped as usize; | ||
} | ||
} | ||
} | ||
|
||
for i in 0..64 * 64 { | ||
let flip_frac = bit_flips[i] as f64 / iters_per_hasher as f64; | ||
if (flip_frac - 0.5).abs() > (worst_bias[i] - 0.5).abs() { | ||
worst_bias[i] = flip_frac; | ||
} | ||
} | ||
} | ||
|
||
worst_bias | ||
} | ||
|
||
fn write_avalanche_csv<H: BuildHasher, F: FnMut() -> H>(name: &str, new_hasher: F) { | ||
println!("calculating avalanche properties of {name}"); | ||
let strings: Vec<String> = compute_u64_avalanche(10000, 1000, new_hasher) | ||
.into_iter() | ||
.map(|b| format!("{b}")) | ||
.collect(); | ||
std::fs::create_dir_all("out").unwrap(); | ||
std::fs::write(format!("out/avalanche-{name}.csv"), strings.join(",")).unwrap(); | ||
} | ||
|
||
fn main() { | ||
write_avalanche_csv("foldhash-fast", || foldhash::fast::RandomState::default()); | ||
write_avalanche_csv("foldhash-quality", || { | ||
foldhash::quality::RandomState::default() | ||
}); | ||
write_avalanche_csv("siphash", || std::hash::RandomState::default()); | ||
write_avalanche_csv("ahash", || ahash::RandomState::default()); | ||
write_avalanche_csv("fxhash", || fxhash::FxBuildHasher::default()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
use criterion::measurement::WallTime; | ||
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion}; | ||
use rand::prelude::*; | ||
use std::collections::{HashMap, HashSet}; | ||
use std::hash::BuildHasher; | ||
use std::hint::black_box; | ||
use std::time::Duration; | ||
|
||
const NUM_PRECOMPUTED_KEYS: usize = 1024; | ||
|
||
mod distribution; | ||
use distribution::Distribution; | ||
|
||
fn profile_hashonly<S: BuildHasher + Default, D: Distribution>( | ||
hash_name: &str, | ||
mut distr: D, | ||
c: &mut BenchmarkGroup<'_, WallTime>, | ||
) { | ||
let name = format!("hashonly-{}-{hash_name}", distr.name().to_lowercase()); | ||
let mut rng = StdRng::seed_from_u64(0x123456789abcdef); | ||
|
||
let hasher = S::default(); | ||
|
||
c.bench_function(&name, |b| { | ||
b.iter_custom(|iters| { | ||
let to_hash: Vec<_> = black_box( | ||
(0..NUM_PRECOMPUTED_KEYS) | ||
.map(|_| distr.sample(&mut rng)) | ||
.collect(), | ||
); | ||
let start = std::time::Instant::now(); | ||
for i in 0..iters as usize { | ||
black_box(hasher.hash_one(&to_hash[i % NUM_PRECOMPUTED_KEYS])); | ||
} | ||
start.elapsed() | ||
}); | ||
}); | ||
} | ||
|
||
fn profile_lookup_hit<S: BuildHasher + Default, D: Distribution>( | ||
hash_name: &str, | ||
mut distr: D, | ||
map_size: usize, | ||
c: &mut BenchmarkGroup<'_, WallTime>, | ||
) { | ||
let name = format!("lookuphit-{}-{hash_name}", distr.name().to_lowercase()); | ||
let mut rng = StdRng::seed_from_u64(0x123456789abcdef); | ||
|
||
c.bench_function(&name, |b| { | ||
b.iter_custom(|iters| { | ||
let mut hm: HashMap<D::Value, u64, S> = HashMap::with_hasher(S::default()); | ||
for i in 0..map_size { | ||
hm.insert(distr.sample(&mut rng), i as u64); | ||
} | ||
|
||
let keys: Vec<_> = hm.keys().cloned().collect(); | ||
let lookup: Vec<_> = black_box( | ||
(0..NUM_PRECOMPUTED_KEYS) | ||
.map(|_| keys.choose(&mut rng).unwrap().clone()) | ||
.collect(), | ||
); | ||
|
||
let start = std::time::Instant::now(); | ||
let mut sum = 0u64; | ||
for i in 0..iters as usize { | ||
if let Some(x) = hm.get(&lookup[i % NUM_PRECOMPUTED_KEYS]) { | ||
sum = sum.wrapping_add(*x); | ||
} | ||
} | ||
black_box(sum); | ||
start.elapsed() | ||
}); | ||
}); | ||
} | ||
|
||
fn profile_lookup_miss<S: BuildHasher + Default, D: Distribution>( | ||
hash_name: &str, | ||
mut distr: D, | ||
map_size: usize, | ||
c: &mut BenchmarkGroup<'_, WallTime>, | ||
) { | ||
let name = format!("lookupmiss-{}-{hash_name}", distr.name().to_lowercase()); | ||
let mut rng = StdRng::seed_from_u64(0x123456789abcdef); | ||
|
||
c.bench_function(&name, |b| { | ||
b.iter_custom(|iters| { | ||
let mut hm: HashMap<D::Value, u64, S> = HashMap::with_hasher(S::default()); | ||
for i in 0..map_size { | ||
hm.insert(distr.sample(&mut rng), i as u64); | ||
} | ||
|
||
let lookup: Vec<_> = black_box( | ||
(0..NUM_PRECOMPUTED_KEYS) | ||
.map(|_| distr.sample_missing(&mut rng)) | ||
.collect(), | ||
); | ||
|
||
let start = std::time::Instant::now(); | ||
let mut sum = 0u64; | ||
for i in 0..iters as usize { | ||
if let Some(x) = hm.get(&lookup[i % NUM_PRECOMPUTED_KEYS]) { | ||
sum = sum.wrapping_add(*x); | ||
} | ||
} | ||
black_box(sum); | ||
start.elapsed() | ||
}); | ||
}); | ||
} | ||
|
||
fn profile_set_build<S: BuildHasher + Default, D: Distribution>( | ||
hash_name: &str, | ||
mut distr: D, | ||
map_size: usize, | ||
c: &mut BenchmarkGroup<'_, WallTime>, | ||
) { | ||
let name = format!("setbuild-{}-{hash_name}", distr.name().to_lowercase()); | ||
let mut rng = StdRng::seed_from_u64(0x123456789abcdef); | ||
|
||
c.bench_function(&name, |b| { | ||
b.iter_custom(|iters| { | ||
// Repeat each key 10 times. | ||
let keys: Vec<_> = (0..map_size).map(|_| distr.sample(&mut rng)).collect(); | ||
let mut keys: Vec<_> = keys.iter().cycle().cloned().take(10 * map_size).collect(); | ||
keys.shuffle(&mut rng); | ||
let keys = black_box(keys); | ||
|
||
let start = std::time::Instant::now(); | ||
for _ in 0..iters as usize { | ||
// We intentionally do not pre-reserve so we observe re-hash | ||
// behavior. | ||
let mut set = HashSet::with_hasher(S::default()); | ||
for key in &keys { | ||
set.insert(key); | ||
} | ||
black_box(set); | ||
} | ||
start.elapsed() | ||
}); | ||
}); | ||
} | ||
|
||
#[rustfmt::skip] | ||
fn profile_distr<D: Distribution>(distr: D, map_size: usize, c: &mut Criterion) { | ||
let c = &mut c.benchmark_group(distr.name()); | ||
c.sampling_mode(criterion::SamplingMode::Flat); | ||
|
||
profile_hashonly::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), c); | ||
profile_hashonly::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), c); | ||
profile_hashonly::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), c); | ||
profile_hashonly::<ahash::RandomState, _>("ahash", distr.clone(), c); | ||
profile_hashonly::<std::hash::RandomState, _>("siphash", distr.clone(), c); | ||
|
||
profile_lookup_miss::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c); | ||
profile_lookup_miss::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c); | ||
profile_lookup_miss::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c); | ||
profile_lookup_miss::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c); | ||
profile_lookup_miss::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c); | ||
|
||
profile_lookup_hit::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c); | ||
profile_lookup_hit::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c); | ||
profile_lookup_hit::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c); | ||
profile_lookup_hit::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c); | ||
profile_lookup_hit::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c); | ||
|
||
profile_set_build::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c); | ||
profile_set_build::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c); | ||
profile_set_build::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c); | ||
profile_set_build::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c); | ||
profile_set_build::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c); | ||
} | ||
|
||
fn bench_hashes(c: &mut Criterion) { | ||
let map_size = 1000; | ||
profile_distr(distribution::U32, map_size, c); | ||
profile_distr(distribution::U64, map_size, c); | ||
profile_distr(distribution::U64LoBits, map_size, c); | ||
profile_distr(distribution::U64HiBits, map_size, c); | ||
profile_distr(distribution::U32Pair, map_size, c); | ||
profile_distr(distribution::U64Pair, map_size, c); | ||
profile_distr(distribution::Rgba, map_size, c); | ||
profile_distr(distribution::Ipv4, map_size, c); | ||
profile_distr(distribution::Ipv6, map_size, c); | ||
profile_distr(distribution::StrUuid, map_size, c); | ||
profile_distr(distribution::StrDate, map_size, c); | ||
profile_distr(distribution::AccessLog, map_size, c); | ||
profile_distr(distribution::StrWordList::english(), map_size, c); | ||
profile_distr(distribution::StrWordList::urls(), map_size, c); | ||
profile_distr(distribution::Kilobyte, map_size, c); | ||
profile_distr(distribution::TenKilobyte, map_size, c); | ||
} | ||
|
||
criterion_group!( | ||
name = benches; | ||
config = Criterion::default().measurement_time(Duration::from_secs(5)); | ||
targets = bench_hashes | ||
); | ||
criterion_main!(benches); |
Oops, something went wrong.