Skip to content

Commit

Permalink
Release 0.1.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Aug 12, 2024
0 parents commit 92a8df9
Show file tree
Hide file tree
Showing 19 changed files with 21,674 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/target
/Cargo.lock
/bench_results
/out
.DS_Store
43 changes: 43 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[package]
name = "foldhash"
version = "0.1.0"
authors = ["Orson Peters <orsonpeters@gmail.com>"]
license = "Zlib"
repository = "https://github.com/orlp/foldhash"
readme = "README.md"
keywords = ["hash", "hasher", "no-std"]
categories = ["algorithms", "no-std"]
description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm."
edition = "2021"
exclude = ["benches", "tools", "assets"]

[features]
default = ["std"]
std = []

[dependencies]

[dev-dependencies]
criterion = "0.5"
hashbrown = "0.14"
uuid = "1.8"
rand = "0.8"
ahash = "0.8"
fxhash = "0.2"
chrono = "0.4"

[lib]
bench = false

[[bench]]
name = "bench"
path = "benches/bench.rs"
harness = false

[[bench]]
name = "avalanche"
path = "benches/avalanche.rs"
harness = false

[profile.release]
lto = "thin"
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2024 Orson Peters

This software is provided 'as-is', without any express or implied warranty. In
no event will the authors be held liable for any damages arising from the use of
this software.

Permission is granted to anyone to use this software for any purpose, including
commercial applications, and to alter it and redistribute it freely, subject to
the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim
that you wrote the original software. If you use this software in a product,
an acknowledgment in the product documentation would be appreciated but is
not required.

2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.

3. This notice may not be removed or altered from any source distribution.
277 changes: 277 additions & 0 deletions README.md

Large diffs are not rendered by default.

Binary file added assets/avalanche-foldhash-fast.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/avalanche-foldhash-quality.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/avalanche-fxhash.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/avalanche-siphash.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
58 changes: 58 additions & 0 deletions benches/avalanche.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use std::hash::BuildHasher;

use rand::prelude::*;

fn compute_u64_avalanche<H: BuildHasher, F: FnMut() -> H>(
num_hashers: usize,
iters_per_hasher: usize,
mut new_hasher: F,
) -> Vec<f64> {
let mut rng = thread_rng();
let mut worst_bias = vec![0.5f64; 64 * 64];
for _ in 0..num_hashers {
let h = new_hasher();
let mut bit_flips = vec![0; 64 * 64];
for _ in 0..iters_per_hasher {
let base_val: u64 = rng.gen();
let base_hash = h.hash_one(base_val);
for flip_pos in 0..64 {
let delta_val = base_val ^ (1 << flip_pos);
let delta_hash = h.hash_one(delta_val);

for test_pos in 0..64 {
let flipped = ((base_hash ^ delta_hash) >> test_pos) & 1;
bit_flips[test_pos * 64 + flip_pos] += flipped as usize;
}
}
}

for i in 0..64 * 64 {
let flip_frac = bit_flips[i] as f64 / iters_per_hasher as f64;
if (flip_frac - 0.5).abs() > (worst_bias[i] - 0.5).abs() {
worst_bias[i] = flip_frac;
}
}
}

worst_bias
}

fn write_avalanche_csv<H: BuildHasher, F: FnMut() -> H>(name: &str, new_hasher: F) {
println!("calculating avalanche properties of {name}");
let strings: Vec<String> = compute_u64_avalanche(10000, 1000, new_hasher)
.into_iter()
.map(|b| format!("{b}"))
.collect();
std::fs::create_dir_all("out").unwrap();
std::fs::write(format!("out/avalanche-{name}.csv"), strings.join(",")).unwrap();
}

fn main() {
write_avalanche_csv("foldhash-fast", || foldhash::fast::RandomState::default());
write_avalanche_csv("foldhash-quality", || {
foldhash::quality::RandomState::default()
});
write_avalanche_csv("siphash", || std::hash::RandomState::default());
write_avalanche_csv("ahash", || ahash::RandomState::default());
write_avalanche_csv("fxhash", || fxhash::FxBuildHasher::default());
}
198 changes: 198 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
use criterion::measurement::WallTime;
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion};
use rand::prelude::*;
use std::collections::{HashMap, HashSet};
use std::hash::BuildHasher;
use std::hint::black_box;
use std::time::Duration;

const NUM_PRECOMPUTED_KEYS: usize = 1024;

mod distribution;
use distribution::Distribution;

fn profile_hashonly<S: BuildHasher + Default, D: Distribution>(
hash_name: &str,
mut distr: D,
c: &mut BenchmarkGroup<'_, WallTime>,
) {
let name = format!("hashonly-{}-{hash_name}", distr.name().to_lowercase());
let mut rng = StdRng::seed_from_u64(0x123456789abcdef);

let hasher = S::default();

c.bench_function(&name, |b| {
b.iter_custom(|iters| {
let to_hash: Vec<_> = black_box(
(0..NUM_PRECOMPUTED_KEYS)
.map(|_| distr.sample(&mut rng))
.collect(),
);
let start = std::time::Instant::now();
for i in 0..iters as usize {
black_box(hasher.hash_one(&to_hash[i % NUM_PRECOMPUTED_KEYS]));
}
start.elapsed()
});
});
}

fn profile_lookup_hit<S: BuildHasher + Default, D: Distribution>(
hash_name: &str,
mut distr: D,
map_size: usize,
c: &mut BenchmarkGroup<'_, WallTime>,
) {
let name = format!("lookuphit-{}-{hash_name}", distr.name().to_lowercase());
let mut rng = StdRng::seed_from_u64(0x123456789abcdef);

c.bench_function(&name, |b| {
b.iter_custom(|iters| {
let mut hm: HashMap<D::Value, u64, S> = HashMap::with_hasher(S::default());
for i in 0..map_size {
hm.insert(distr.sample(&mut rng), i as u64);
}

let keys: Vec<_> = hm.keys().cloned().collect();
let lookup: Vec<_> = black_box(
(0..NUM_PRECOMPUTED_KEYS)
.map(|_| keys.choose(&mut rng).unwrap().clone())
.collect(),
);

let start = std::time::Instant::now();
let mut sum = 0u64;
for i in 0..iters as usize {
if let Some(x) = hm.get(&lookup[i % NUM_PRECOMPUTED_KEYS]) {
sum = sum.wrapping_add(*x);
}
}
black_box(sum);
start.elapsed()
});
});
}

fn profile_lookup_miss<S: BuildHasher + Default, D: Distribution>(
hash_name: &str,
mut distr: D,
map_size: usize,
c: &mut BenchmarkGroup<'_, WallTime>,
) {
let name = format!("lookupmiss-{}-{hash_name}", distr.name().to_lowercase());
let mut rng = StdRng::seed_from_u64(0x123456789abcdef);

c.bench_function(&name, |b| {
b.iter_custom(|iters| {
let mut hm: HashMap<D::Value, u64, S> = HashMap::with_hasher(S::default());
for i in 0..map_size {
hm.insert(distr.sample(&mut rng), i as u64);
}

let lookup: Vec<_> = black_box(
(0..NUM_PRECOMPUTED_KEYS)
.map(|_| distr.sample_missing(&mut rng))
.collect(),
);

let start = std::time::Instant::now();
let mut sum = 0u64;
for i in 0..iters as usize {
if let Some(x) = hm.get(&lookup[i % NUM_PRECOMPUTED_KEYS]) {
sum = sum.wrapping_add(*x);
}
}
black_box(sum);
start.elapsed()
});
});
}

fn profile_set_build<S: BuildHasher + Default, D: Distribution>(
hash_name: &str,
mut distr: D,
map_size: usize,
c: &mut BenchmarkGroup<'_, WallTime>,
) {
let name = format!("setbuild-{}-{hash_name}", distr.name().to_lowercase());
let mut rng = StdRng::seed_from_u64(0x123456789abcdef);

c.bench_function(&name, |b| {
b.iter_custom(|iters| {
// Repeat each key 10 times.
let keys: Vec<_> = (0..map_size).map(|_| distr.sample(&mut rng)).collect();
let mut keys: Vec<_> = keys.iter().cycle().cloned().take(10 * map_size).collect();
keys.shuffle(&mut rng);
let keys = black_box(keys);

let start = std::time::Instant::now();
for _ in 0..iters as usize {
// We intentionally do not pre-reserve so we observe re-hash
// behavior.
let mut set = HashSet::with_hasher(S::default());
for key in &keys {
set.insert(key);
}
black_box(set);
}
start.elapsed()
});
});
}

#[rustfmt::skip]
fn profile_distr<D: Distribution>(distr: D, map_size: usize, c: &mut Criterion) {
let c = &mut c.benchmark_group(distr.name());
c.sampling_mode(criterion::SamplingMode::Flat);

profile_hashonly::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), c);
profile_hashonly::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), c);
profile_hashonly::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), c);
profile_hashonly::<ahash::RandomState, _>("ahash", distr.clone(), c);
profile_hashonly::<std::hash::RandomState, _>("siphash", distr.clone(), c);

profile_lookup_miss::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c);
profile_lookup_miss::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c);
profile_lookup_miss::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c);
profile_lookup_miss::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c);
profile_lookup_miss::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c);

profile_lookup_hit::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c);
profile_lookup_hit::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c);
profile_lookup_hit::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c);
profile_lookup_hit::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c);
profile_lookup_hit::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c);

profile_set_build::<foldhash::fast::RandomState, _>("foldhash-fast", distr.clone(), map_size, c);
profile_set_build::<foldhash::quality::RandomState, _>("foldhash-quality", distr.clone(), map_size, c);
profile_set_build::<fxhash::FxBuildHasher, _>("fxhash", distr.clone(), map_size, c);
profile_set_build::<ahash::RandomState, _>("ahash", distr.clone(), map_size, c);
profile_set_build::<std::hash::RandomState, _>("siphash", distr.clone(), map_size, c);
}

fn bench_hashes(c: &mut Criterion) {
let map_size = 1000;
profile_distr(distribution::U32, map_size, c);
profile_distr(distribution::U64, map_size, c);
profile_distr(distribution::U64LoBits, map_size, c);
profile_distr(distribution::U64HiBits, map_size, c);
profile_distr(distribution::U32Pair, map_size, c);
profile_distr(distribution::U64Pair, map_size, c);
profile_distr(distribution::Rgba, map_size, c);
profile_distr(distribution::Ipv4, map_size, c);
profile_distr(distribution::Ipv6, map_size, c);
profile_distr(distribution::StrUuid, map_size, c);
profile_distr(distribution::StrDate, map_size, c);
profile_distr(distribution::AccessLog, map_size, c);
profile_distr(distribution::StrWordList::english(), map_size, c);
profile_distr(distribution::StrWordList::urls(), map_size, c);
profile_distr(distribution::Kilobyte, map_size, c);
profile_distr(distribution::TenKilobyte, map_size, c);
}

criterion_group!(
name = benches;
config = Criterion::default().measurement_time(Duration::from_secs(5));
targets = bench_hashes
);
criterion_main!(benches);
Loading

0 comments on commit 92a8df9

Please sign in to comment.