Skip to content

Commit 2dc6f62

Browse files
author
Lei Jin
committed
handle kDelete type in cuckoo builder
Summary: when I changed std::vector<std::string, std::string> to std::string to store key/value pairs in builder, I missed the handling for kDeletion type. As a result, value_size_ can be wrong if the first add key is for deletion. The is captured by ./cuckoo_table_db_test Test Plan: ./cuckoo_table_db_test ./cuckoo_table_reader_test ./cuckoo_table_builder_test Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24045
1 parent 389edb6 commit 2dc6f62

File tree

2 files changed

+59
-7
lines changed

2 files changed

+59
-7
lines changed

table/cuckoo_table_builder.cc

+53-6
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,11 @@ CuckooTableBuilder::CuckooTableBuilder(
6060
hash_table_size_(use_module_hash ? 0 : 2),
6161
is_last_level_file_(false),
6262
has_seen_first_key_(false),
63+
has_seen_first_value_(false),
6364
key_size_(0),
6465
value_size_(0),
6566
num_entries_(0),
67+
num_values_(0),
6668
ucomp_(user_comparator),
6769
use_module_hash_(use_module_hash),
6870
identity_as_first_hash_(identity_as_first_hash),
@@ -84,6 +86,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
8486
status_ = Status::Corruption("Unable to parse key into inernal key.");
8587
return;
8688
}
89+
if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) {
90+
status_ = Status::NotSupported("Unsupported key type " +
91+
std::to_string(ikey.type));
92+
return;
93+
}
94+
8795
// Determine if we can ignore the sequence number and value type from
8896
// internal keys by looking at sequence number from first key. We assume
8997
// that if first key has a zero sequence number, then all the remaining
@@ -94,16 +102,38 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
94102
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
95103
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
96104
key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size();
97-
value_size_ = value.size();
105+
}
106+
if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) {
107+
status_ = Status::NotSupported("all keys have to be the same size");
108+
return;
98109
}
99110
// Even if one sequence number is non-zero, then it is not last level.
100111
assert(!is_last_level_file_ || ikey.sequence == 0);
101-
if (is_last_level_file_) {
102-
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
112+
113+
if (ikey.type == kTypeValue) {
114+
if (!has_seen_first_value_) {
115+
has_seen_first_value_ = true;
116+
value_size_ = value.size();
117+
}
118+
if (value_size_ != value.size()) {
119+
status_ = Status::NotSupported("all values have to be the same size");
120+
return;
121+
}
122+
123+
if (is_last_level_file_) {
124+
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
125+
} else {
126+
kvs_.append(key.data(), key.size());
127+
}
128+
kvs_.append(value.data(), value.size());
129+
++num_values_;
103130
} else {
104-
kvs_.append(key.data(), key.size());
131+
if (is_last_level_file_) {
132+
deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size());
133+
} else {
134+
deleted_keys_.append(key.data(), key.size());
135+
}
105136
}
106-
kvs_.append(value.data(), value.size());
107137
++num_entries_;
108138

109139
// In order to fill the empty buckets in the hash table, we identify a
@@ -123,15 +153,30 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
123153
}
124154
}
125155

156+
bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const {
157+
assert(closed_);
158+
return idx >= num_values_;
159+
}
160+
126161
Slice CuckooTableBuilder::GetKey(uint64_t idx) const {
162+
assert(closed_);
163+
if (IsDeletedKey(idx)) {
164+
return Slice(&deleted_keys_[(idx - num_values_) * key_size_], key_size_);
165+
}
127166
return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_);
128167
}
129168

130169
Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const {
170+
assert(closed_);
131171
return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx));
132172
}
133173

134174
Slice CuckooTableBuilder::GetValue(uint64_t idx) const {
175+
assert(closed_);
176+
if (IsDeletedKey(idx)) {
177+
static std::string empty_value(value_size_, 'a');
178+
return Slice(empty_value);
179+
}
135180
return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_);
136181
}
137182

@@ -256,7 +301,9 @@ Status CuckooTableBuilder::Finish() {
256301
++num_added;
257302
s = file_->Append(GetKey(bucket.vector_idx));
258303
if (s.ok()) {
259-
s = file_->Append(GetValue(bucket.vector_idx));
304+
if (value_size_ > 0) {
305+
s = file_->Append(GetValue(bucket.vector_idx));
306+
}
260307
}
261308
}
262309
if (!s.ok()) {

table/cuckoo_table_builder.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class CuckooTableBuilder: public TableBuilder {
7575
uint64_t* bucket_id);
7676
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
7777

78+
inline bool IsDeletedKey(uint64_t idx) const;
7879
inline Slice GetKey(uint64_t idx) const;
7980
inline Slice GetUserKey(uint64_t idx) const;
8081
inline Slice GetValue(uint64_t idx) const;
@@ -88,14 +89,18 @@ class CuckooTableBuilder: public TableBuilder {
8889
uint64_t hash_table_size_;
8990
bool is_last_level_file_;
9091
bool has_seen_first_key_;
92+
bool has_seen_first_value_;
9193
uint64_t key_size_;
9294
uint64_t value_size_;
9395
// A list of fixed-size key-value pairs concatenating into a string.
9496
// Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific
9597
// key / value given an index
9698
std::string kvs_;
97-
// Number of key-value pairs stored in kvs_
99+
std::string deleted_keys_;
100+
// Number of key-value pairs stored in kvs_ + number of deleted keys
98101
uint64_t num_entries_;
102+
// Number of keys that contain value (non-deletion op)
103+
uint64_t num_values_;
99104
Status status_;
100105
TableProperties properties_;
101106
const Comparator* ucomp_;

0 commit comments

Comments
 (0)