Skip to content

Commit 7f71448

Browse files
committed
Implementing a cache friendly version of Cuckoo Hash
Summary: This implements a cache friendly version of Cuckoo Hash in which, in case of collission, we try to insert in next few locations. The size of the neighborhood to check is taken as an input parameter in builder and stored in the table. Test Plan: make check all cuckoo_table_{db,reader,builder}_test Reviewers: sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D22455
1 parent d977e55 commit 7f71448

9 files changed

+308
-139
lines changed

include/rocksdb/table.h

+34-3
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
227227
PlainTableOptions());
228228

229229
struct CuckooTablePropertyNames {
230+
// The key that is used to fill empty buckets.
230231
static const std::string kEmptyKey;
232+
// Fixed length of value.
231233
static const std::string kValueLength;
232-
static const std::string kNumHashTable;
233-
static const std::string kMaxNumBuckets;
234+
// Number of hash functions used in Cuckoo Hash.
235+
static const std::string kNumHashFunc;
236+
// It denotes the number of buckets in a Cuckoo Block. Given a key and a
237+
// particular hash function, a Cuckoo Block is a set of consecutive buckets,
238+
// where starting bucket id is given by the hash function on the key. In case
239+
// of a collision during inserting the key, the builder tries to insert the
240+
// key in other locations of the cuckoo block before using the next hash
241+
// function. This reduces cache miss during read operation in case of
242+
// collision.
243+
static const std::string kCuckooBlockSize;
244+
// Size of the hash table. Use this number to compute the modulo of hash
245+
// function. The actual number of buckets will be kMaxHashTableSize +
246+
// kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
247+
// accommodate the Cuckoo Block from end of hash table, due to cache friendly
248+
// implementation.
249+
static const std::string kHashTableSize;
250+
// Denotes if the key sorted in the file is Internal Key (if false)
251+
// or User Key only (if true).
234252
static const std::string kIsLastLevel;
235253
};
236254

255+
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
256+
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
257+
// result in larger hash tables with fewer collisions.
258+
// @max_search_depth: A property used by builder to determine the depth to go to
259+
// to search for a path to displace elements in case of
260+
// collision. See Builder.MakeSpaceForKey method. Higher
261+
// values result in more efficient hash tables with fewer
262+
// lookups but take more time to build.
263+
// @cuckoo_block_size: In case of collision while inserting, the builder
264+
// attempts to insert in the next cuckoo_block_size
265+
// locations before skipping over to the next Cuckoo hash
266+
// function. This makes lookups more cache friendly in case
267+
// of collisions.
237268
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
238-
uint32_t max_search_depth = 100);
269+
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
239270

240271
#endif // ROCKSDB_LITE
241272

table/cuckoo_table_builder.cc

+82-57
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,31 @@
2424
namespace rocksdb {
2525
const std::string CuckooTablePropertyNames::kEmptyKey =
2626
"rocksdb.cuckoo.bucket.empty.key";
27-
const std::string CuckooTablePropertyNames::kNumHashTable =
27+
const std::string CuckooTablePropertyNames::kNumHashFunc =
2828
"rocksdb.cuckoo.hash.num";
29-
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
30-
"rocksdb.cuckoo.bucket.maxnum";
29+
const std::string CuckooTablePropertyNames::kHashTableSize =
30+
"rocksdb.cuckoo.hash.size";
3131
const std::string CuckooTablePropertyNames::kValueLength =
3232
"rocksdb.cuckoo.value.length";
3333
const std::string CuckooTablePropertyNames::kIsLastLevel =
3434
"rocksdb.cuckoo.file.islastlevel";
35+
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
36+
"rocksdb.cuckoo.hash.cuckooblocksize";
3537

3638
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
3739
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
3840

3941
CuckooTableBuilder::CuckooTableBuilder(
4042
WritableFile* file, double hash_table_ratio,
4143
uint32_t max_num_hash_table, uint32_t max_search_depth,
42-
const Comparator* user_comparator,
44+
const Comparator* user_comparator, uint32_t cuckoo_block_size,
4345
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
44-
: num_hash_table_(2),
46+
: num_hash_func_(2),
4547
file_(file),
4648
hash_table_ratio_(hash_table_ratio),
47-
max_num_hash_table_(max_num_hash_table),
49+
max_num_hash_func_(max_num_hash_table),
4850
max_search_depth_(max_search_depth),
51+
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
4952
is_last_level_file_(false),
5053
has_seen_first_key_(false),
5154
ucomp_(user_comparator),
@@ -101,48 +104,58 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
101104
}
102105

103106
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
104-
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
105-
buckets->resize(num_buckets);
107+
uint64_t hash_table_size = kvs_.size() / hash_table_ratio_;
108+
buckets->resize(hash_table_size + cuckoo_block_size_ - 1);
106109
uint64_t make_space_for_key_call_id = 0;
107110
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
108111
uint64_t bucket_id;
109112
bool bucket_found = false;
110113
autovector<uint64_t> hash_vals;
111114
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
112115
ExtractUserKey(kvs_[vector_idx].first);
113-
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
114-
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
115-
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
116-
bucket_id = hash_val;
117-
bucket_found = true;
118-
break;
119-
} else {
120-
if (ucomp_->Compare(user_key, is_last_level_file_
121-
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
122-
: ExtractUserKey(
123-
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
124-
return Status::NotSupported("Same key is being inserted again.");
116+
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
117+
++hash_cnt) {
118+
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size);
119+
// If there is a collision, check next cuckoo_block_size_ locations for
120+
// empty locations. While checking, if we reach end of the hash table,
121+
// stop searching and proceed for next hash function.
122+
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
123+
++block_idx, ++hash_val) {
124+
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
125+
bucket_id = hash_val;
126+
bucket_found = true;
127+
break;
128+
} else {
129+
if (ucomp_->Compare(user_key, is_last_level_file_
130+
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
131+
: ExtractUserKey(
132+
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
133+
return Status::NotSupported("Same key is being inserted again.");
134+
}
135+
hash_vals.push_back(hash_val);
125136
}
126-
hash_vals.push_back(hash_val);
127137
}
128138
}
129139
while (!bucket_found && !MakeSpaceForKey(hash_vals,
130-
++make_space_for_key_call_id, buckets, &bucket_id)) {
140+
hash_table_size, ++make_space_for_key_call_id, buckets, &bucket_id)) {
131141
// Rehash by increashing number of hash tables.
132-
if (num_hash_table_ >= max_num_hash_table_) {
133-
return Status::NotSupported("Too many collissions. Unable to hash.");
142+
if (num_hash_func_ >= max_num_hash_func_) {
143+
return Status::NotSupported("Too many collisions. Unable to hash.");
134144
}
135145
// We don't really need to rehash the entire table because old hashes are
136146
// still valid and we only increased the number of hash functions.
137147
uint64_t hash_val = get_slice_hash_(user_key,
138-
num_hash_table_, num_buckets);
139-
++num_hash_table_;
140-
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
141-
bucket_found = true;
142-
bucket_id = hash_val;
143-
break;
144-
} else {
145-
hash_vals.push_back(hash_val);
148+
num_hash_func_, hash_table_size);
149+
++num_hash_func_;
150+
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
151+
++block_idx, ++hash_val) {
152+
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
153+
bucket_found = true;
154+
bucket_id = hash_val;
155+
break;
156+
} else {
157+
hash_vals.push_back(hash_val);
158+
}
146159
}
147160
}
148161
(*buckets)[bucket_id].vector_idx = vector_idx;
@@ -226,16 +239,22 @@ Status CuckooTableBuilder::Finish() {
226239
properties_.user_collected_properties[
227240
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
228241
properties_.user_collected_properties[
229-
CuckooTablePropertyNames::kNumHashTable].assign(
230-
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
231-
uint64_t num_buckets = buckets.size();
242+
CuckooTablePropertyNames::kNumHashFunc].assign(
243+
reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
244+
245+
uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
232246
properties_.user_collected_properties[
233-
CuckooTablePropertyNames::kMaxNumBuckets].assign(
234-
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
247+
CuckooTablePropertyNames::kHashTableSize].assign(
248+
reinterpret_cast<const char*>(&hash_table_size),
249+
sizeof(hash_table_size));
235250
properties_.user_collected_properties[
236251
CuckooTablePropertyNames::kIsLastLevel].assign(
237252
reinterpret_cast<const char*>(&is_last_level_file_),
238253
sizeof(is_last_level_file_));
254+
properties_.user_collected_properties[
255+
CuckooTablePropertyNames::kCuckooBlockSize].assign(
256+
reinterpret_cast<const char*>(&cuckoo_block_size_),
257+
sizeof(cuckoo_block_size_));
239258

240259
// Write meta blocks.
241260
MetaIndexBuilder meta_index_builder;
@@ -307,6 +326,7 @@ uint64_t CuckooTableBuilder::FileSize() const {
307326
// If tree depth exceedes max depth, we return false indicating failure.
308327
bool CuckooTableBuilder::MakeSpaceForKey(
309328
const autovector<uint64_t>& hash_vals,
329+
const uint64_t hash_table_size,
310330
const uint64_t make_space_for_key_call_id,
311331
std::vector<CuckooBucket>* buckets,
312332
uint64_t* bucket_id) {
@@ -322,12 +342,13 @@ bool CuckooTableBuilder::MakeSpaceForKey(
322342
std::vector<CuckooNode> tree;
323343
// We want to identify already visited buckets in the current method call so
324344
// that we don't add same buckets again for exploration in the tree.
325-
// We do this by maintaining a count of current method call, which acts as a
326-
// unique id for this invocation of the method. We store this number into
327-
// the nodes that we explore in current method call.
345+
// We do this by maintaining a count of current method call in
346+
// make_space_for_key_call_id, which acts as a unique id for this invocation
347+
// of the method. We store this number into the nodes that we explore in
348+
// current method call.
328349
// It is unlikely for the increment operation to overflow because the maximum
329-
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
330-
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
350+
// no. of times this will be called is <= max_num_hash_func_ + kvs_.size().
351+
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
331352
uint64_t bucket_id = hash_vals[hash_cnt];
332353
(*buckets)[bucket_id].make_space_for_key_call_id =
333354
make_space_for_key_call_id;
@@ -342,22 +363,26 @@ bool CuckooTableBuilder::MakeSpaceForKey(
342363
break;
343364
}
344365
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
345-
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
366+
for (uint32_t hash_cnt = 0;
367+
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
346368
uint64_t child_bucket_id = get_slice_hash_(
347369
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
348370
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
349-
hash_cnt, buckets->size());
350-
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
351-
make_space_for_key_call_id) {
352-
continue;
353-
}
354-
(*buckets)[child_bucket_id].make_space_for_key_call_id =
355-
make_space_for_key_call_id;
356-
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
357-
curr_pos));
358-
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
359-
null_found = true;
360-
break;
371+
hash_cnt, hash_table_size);
372+
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
373+
++block_idx, ++child_bucket_id) {
374+
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
375+
make_space_for_key_call_id) {
376+
continue;
377+
}
378+
(*buckets)[child_bucket_id].make_space_for_key_call_id =
379+
make_space_for_key_call_id;
380+
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
381+
curr_pos));
382+
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
383+
null_found = true;
384+
break;
385+
}
361386
}
362387
}
363388
++curr_pos;
@@ -367,10 +392,10 @@ bool CuckooTableBuilder::MakeSpaceForKey(
367392
// There is an empty node in tree.back(). Now, traverse the path from this
368393
// empty node to top of the tree and at every node in the path, replace
369394
// child with the parent. Stop when first level is reached in the tree
370-
// (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return
395+
// (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
371396
// this location in first level for target key to be inserted.
372397
uint32_t bucket_to_replace_pos = tree.size()-1;
373-
while (bucket_to_replace_pos >= num_hash_table_) {
398+
while (bucket_to_replace_pos >= num_hash_func_) {
374399
CuckooNode& curr_node = tree[bucket_to_replace_pos];
375400
(*buckets)[curr_node.bucket_id] =
376401
(*buckets)[tree[curr_node.parent_pos].bucket_id];

table/cuckoo_table_builder.h

+6-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class CuckooTableBuilder: public TableBuilder {
2323
CuckooTableBuilder(
2424
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
2525
uint32_t max_search_depth, const Comparator* user_comparator,
26+
uint32_t cuckoo_block_size,
2627
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
2728

2829
// REQUIRES: Either Finish() or Abandon() has been called.
@@ -60,24 +61,26 @@ class CuckooTableBuilder: public TableBuilder {
6061
CuckooBucket()
6162
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
6263
uint32_t vector_idx;
63-
// This number will not exceed kvs_.size() + max_num_hash_table_.
64+
// This number will not exceed kvs_.size() + max_num_hash_func_.
6465
// We assume number of items is <= 2^32.
6566
uint32_t make_space_for_key_call_id;
6667
};
6768
static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();
6869

6970
bool MakeSpaceForKey(
7071
const autovector<uint64_t>& hash_vals,
72+
const uint64_t hash_table_size,
7173
const uint64_t call_id,
7274
std::vector<CuckooBucket>* buckets,
7375
uint64_t* bucket_id);
7476
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
7577

76-
uint32_t num_hash_table_;
78+
uint32_t num_hash_func_;
7779
WritableFile* file_;
7880
const double hash_table_ratio_;
79-
const uint32_t max_num_hash_table_;
81+
const uint32_t max_num_hash_func_;
8082
const uint32_t max_search_depth_;
83+
const uint32_t cuckoo_block_size_;
8184
bool is_last_level_file_;
8285
Status status_;
8386
std::vector<std::pair<std::string, std::string>> kvs_;

0 commit comments

Comments
 (0)