-
-
Notifications
You must be signed in to change notification settings - Fork 721
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add term hashmap benchmark * refactor arena hashmap add inlines remove occupied array and use table_entry.is_empty instead (saves 4 bytes per entry) reduce saturation threshold from 1/3 to 1/2 to reduce memory use u32 for UnorderedId (we have the 4billion limit anyways on the Columnar stuff) fix naming LinearProbing remove byteorder dependency memory consumption went down from 2Gb to 1.8GB on indexing wikipedia dataset in tantivy * Update stacker/src/arena_hashmap.rs Co-authored-by: Paul Masurel <paul@quickwit.io> --------- Co-authored-by: Paul Masurel <paul@quickwit.io>
- Loading branch information
1 parent
780e263
commit e83abbf
Showing
8 changed files
with
195 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#![allow(dead_code)] | ||
extern crate criterion; | ||
|
||
use criterion::*; | ||
use rand::SeedableRng; | ||
use tantivy_stacker::ArenaHashMap; | ||
|
||
const ALICE: &str = include_str!("../../benches/alice.txt"); | ||
|
||
fn bench_hashmap_throughput(c: &mut Criterion) { | ||
let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Linear); | ||
|
||
let mut group = c.benchmark_group("CreateHashMap"); | ||
group.plot_config(plot_config); | ||
|
||
let input_name = "alice"; | ||
let input_bytes = ALICE.len() as u64; | ||
group.throughput(Throughput::Bytes(input_bytes)); | ||
|
||
group.bench_with_input( | ||
BenchmarkId::new(input_name.to_string(), input_bytes), | ||
&ALICE, | ||
|b, i| b.iter(|| create_hash_map(i.split_whitespace().map(|el| el.as_bytes()))), | ||
); | ||
// numbers | ||
let input_bytes = 1_000_000 * 8 as u64; | ||
group.throughput(Throughput::Bytes(input_bytes)); | ||
|
||
group.bench_with_input( | ||
BenchmarkId::new("numbers".to_string(), input_bytes), | ||
&(0..1_000_000u64), | ||
|b, i| b.iter(|| create_hash_map(i.clone().map(|el| el.to_le_bytes()))), | ||
); | ||
|
||
// numbers zipf | ||
use rand::distributions::Distribution; | ||
use rand::rngs::StdRng; | ||
let mut rng = StdRng::from_seed([3u8; 32]); | ||
let zipf = zipf::ZipfDistribution::new(10_000, 1.03).unwrap(); | ||
|
||
let input_bytes = 1_000_000 * 8 as u64; | ||
group.throughput(Throughput::Bytes(input_bytes)); | ||
|
||
group.bench_with_input( | ||
BenchmarkId::new("numbers_zipf".to_string(), input_bytes), | ||
&(0..1_000_000u64), | ||
|b, i| b.iter(|| create_hash_map(i.clone().map(|_el| zipf.sample(&mut rng).to_le_bytes()))), | ||
); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn create_hash_map<'a, T: AsRef<[u8]>>(terms: impl Iterator<Item = T>) -> ArenaHashMap { | ||
let mut map = ArenaHashMap::with_capacity(4); | ||
for term in terms { | ||
map.mutate_or_create(term.as_ref(), |val| { | ||
if let Some(mut val) = val { | ||
val += 1; | ||
val | ||
} else { | ||
1u64 | ||
} | ||
}); | ||
} | ||
|
||
map | ||
} | ||
|
||
criterion_group!(block_benches, bench_hashmap_throughput,); | ||
criterion_main!(block_benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
use tantivy_stacker::ArenaHashMap; | ||
|
||
const ALICE: &str = include_str!("../../benches/alice.txt"); | ||
|
||
fn main() { | ||
create_hash_map((0..100_000_000).map(|el| el.to_string())); | ||
|
||
for _ in 0..1000 { | ||
create_hash_map(ALICE.split_whitespace()); | ||
} | ||
} | ||
|
||
fn create_hash_map<'a, T: AsRef<str>>(terms: impl Iterator<Item = T>) -> ArenaHashMap { | ||
let mut map = ArenaHashMap::with_capacity(4); | ||
for term in terms { | ||
map.mutate_or_create(term.as_ref().as_bytes(), |val| { | ||
if let Some(mut val) = val { | ||
val += 1; | ||
val | ||
} else { | ||
1u64 | ||
} | ||
}); | ||
} | ||
|
||
map | ||
} |
Oops, something went wrong.