@@ -60,6 +60,9 @@ CuckooTableBuilder::CuckooTableBuilder(
60
60
hash_table_size_(use_module_hash ? 0 : 2 ),
61
61
is_last_level_file_(false ),
62
62
has_seen_first_key_(false ),
63
+ key_size_(0 ),
64
+ value_size_(0 ),
65
+ num_entries_(0 ),
63
66
ucomp_(user_comparator),
64
67
use_module_hash_(use_module_hash),
65
68
identity_as_first_hash_(identity_as_first_hash),
@@ -72,7 +75,7 @@ CuckooTableBuilder::CuckooTableBuilder(
72
75
}
73
76
74
77
void CuckooTableBuilder::Add (const Slice& key, const Slice& value) {
75
- if (kvs_. size () >= kMaxVectorIdx - 1 ) {
78
+ if (num_entries_ >= kMaxVectorIdx - 1 ) {
76
79
status_ = Status::NotSupported (" Number of keys in a file must be < 2^32-1" );
77
80
return ;
78
81
}
@@ -90,15 +93,18 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
90
93
has_seen_first_key_ = true ;
91
94
smallest_user_key_.assign (ikey.user_key .data (), ikey.user_key .size ());
92
95
largest_user_key_.assign (ikey.user_key .data (), ikey.user_key .size ());
96
+ key_size_ = is_last_level_file_ ? ikey.user_key .size () : key.size ();
97
+ value_size_ = value.size ();
93
98
}
94
99
// Even if one sequence number is non-zero, then it is not last level.
95
100
assert (!is_last_level_file_ || ikey.sequence == 0 );
96
101
if (is_last_level_file_) {
97
- kvs_.emplace_back (std::make_pair (
98
- ikey.user_key .ToString (), value.ToString ()));
102
+ kvs_.append (ikey.user_key .data (), ikey.user_key .size ());
99
103
} else {
100
- kvs_.emplace_back ( std::make_pair ( key.ToString (), value. ToString () ));
104
+ kvs_.append ( key.data (), key. size ( ));
101
105
}
106
+ kvs_.append (value.data (), value.size ());
107
+ ++num_entries_;
102
108
103
109
// In order to fill the empty buckets in the hash table, we identify a
104
110
// key which is not used so far (unused_user_key). We determine this by
@@ -111,21 +117,32 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
111
117
largest_user_key_.assign (ikey.user_key .data (), ikey.user_key .size ());
112
118
}
113
119
if (!use_module_hash_) {
114
- if (hash_table_size_ < kvs_. size () / max_hash_table_ratio_) {
120
+ if (hash_table_size_ < num_entries_ / max_hash_table_ratio_) {
115
121
hash_table_size_ *= 2 ;
116
122
}
117
123
}
118
124
}
119
125
126
+ Slice CuckooTableBuilder::GetKey (uint64_t idx) const {
127
+ return Slice (&kvs_[idx * (key_size_ + value_size_)], key_size_);
128
+ }
129
+
130
+ Slice CuckooTableBuilder::GetUserKey (uint64_t idx) const {
131
+ return is_last_level_file_ ? GetKey (idx) : ExtractUserKey (GetKey (idx));
132
+ }
133
+
134
+ Slice CuckooTableBuilder::GetValue (uint64_t idx) const {
135
+ return Slice (&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_);
136
+ }
137
+
120
138
Status CuckooTableBuilder::MakeHashTable (std::vector<CuckooBucket>* buckets) {
121
139
buckets->resize (hash_table_size_ + cuckoo_block_size_ - 1 );
122
140
uint64_t make_space_for_key_call_id = 0 ;
123
- for (uint32_t vector_idx = 0 ; vector_idx < kvs_. size () ; vector_idx++) {
141
+ for (uint32_t vector_idx = 0 ; vector_idx < num_entries_ ; vector_idx++) {
124
142
uint64_t bucket_id;
125
143
bool bucket_found = false ;
126
144
autovector<uint64_t > hash_vals;
127
- Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
128
- ExtractUserKey (kvs_[vector_idx].first );
145
+ Slice user_key = GetUserKey (vector_idx);
129
146
for (uint32_t hash_cnt = 0 ; hash_cnt < num_hash_func_ && !bucket_found;
130
147
++hash_cnt) {
131
148
uint64_t hash_val = CuckooHash (user_key, hash_cnt, use_module_hash_,
@@ -140,10 +157,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
140
157
bucket_found = true ;
141
158
break ;
142
159
} else {
143
- if (ucomp_->Compare (user_key, is_last_level_file_
144
- ? Slice (kvs_[(*buckets)[hash_val].vector_idx ].first )
145
- : ExtractUserKey (
146
- kvs_[(*buckets)[hash_val].vector_idx ].first )) == 0 ) {
160
+ if (ucomp_->Compare (user_key,
161
+ GetUserKey ((*buckets)[hash_val].vector_idx )) == 0 ) {
147
162
return Status::NotSupported (" Same key is being inserted again." );
148
163
}
149
164
hash_vals.push_back (hash_val);
@@ -183,10 +198,10 @@ Status CuckooTableBuilder::Finish() {
183
198
std::vector<CuckooBucket> buckets;
184
199
Status s;
185
200
std::string unused_bucket;
186
- if (!kvs_. empty () ) {
201
+ if (num_entries_ > 0 ) {
187
202
// Calculate the real hash size if module hash is enabled.
188
203
if (use_module_hash_) {
189
- hash_table_size_ = kvs_. size () / max_hash_table_ratio_;
204
+ hash_table_size_ = num_entries_ / max_hash_table_ratio_;
190
205
}
191
206
s = MakeHashTable (&buckets);
192
207
if (!s.ok ()) {
@@ -224,14 +239,13 @@ Status CuckooTableBuilder::Finish() {
224
239
AppendInternalKey (&unused_bucket, ikey);
225
240
}
226
241
}
227
- properties_.num_entries = kvs_.size ();
228
- properties_.fixed_key_len = unused_bucket.size ();
229
- uint32_t value_length = kvs_.empty () ? 0 : kvs_[0 ].second .size ();
230
- uint32_t bucket_size = value_length + properties_.fixed_key_len ;
242
+ properties_.num_entries = num_entries_;
243
+ properties_.fixed_key_len = key_size_;
231
244
properties_.user_collected_properties [
232
245
CuckooTablePropertyNames::kValueLength ].assign (
233
- reinterpret_cast <const char *>(&value_length ), sizeof (value_length ));
246
+ reinterpret_cast <const char *>(&value_size_ ), sizeof (value_size_ ));
234
247
248
+ uint64_t bucket_size = key_size_ + value_size_;
235
249
unused_bucket.resize (bucket_size, ' a' );
236
250
// Write the table.
237
251
uint32_t num_added = 0 ;
@@ -240,9 +254,9 @@ Status CuckooTableBuilder::Finish() {
240
254
s = file_->Append (Slice (unused_bucket));
241
255
} else {
242
256
++num_added;
243
- s = file_->Append (kvs_[ bucket.vector_idx ]. first );
257
+ s = file_->Append (GetKey ( bucket.vector_idx ) );
244
258
if (s.ok ()) {
245
- s = file_->Append (kvs_[ bucket.vector_idx ]. second );
259
+ s = file_->Append (GetValue ( bucket.vector_idx ) );
246
260
}
247
261
}
248
262
if (!s.ok ()) {
@@ -251,7 +265,7 @@ Status CuckooTableBuilder::Finish() {
251
265
}
252
266
assert (num_added == NumEntries ());
253
267
properties_.raw_key_size = num_added * properties_.fixed_key_len ;
254
- properties_.raw_value_size = num_added * value_length ;
268
+ properties_.raw_value_size = num_added * value_size_ ;
255
269
256
270
uint64_t offset = buckets.size () * bucket_size;
257
271
properties_.data_size = offset;
@@ -330,31 +344,29 @@ void CuckooTableBuilder::Abandon() {
330
344
}
331
345
332
346
uint64_t CuckooTableBuilder::NumEntries () const {
333
- return kvs_. size () ;
347
+ return num_entries_ ;
334
348
}
335
349
336
350
uint64_t CuckooTableBuilder::FileSize () const {
337
351
if (closed_) {
338
352
return file_->GetFileSize ();
339
- } else if (kvs_. size () == 0 ) {
353
+ } else if (num_entries_ == 0 ) {
340
354
return 0 ;
341
355
}
342
356
343
357
if (use_module_hash_) {
344
- return (kvs_[0 ].first .size () + kvs_[0 ].second .size ()) * kvs_.size () /
345
- max_hash_table_ratio_;
358
+ return (key_size_ + value_size_) * num_entries_ / max_hash_table_ratio_;
346
359
} else {
347
360
// Account for buckets being a power of two.
348
361
// As elements are added, file size remains constant for a while and
349
362
// doubles its size. Since compaction algorithm stops adding elements
350
363
// only after it exceeds the file limit, we account for the extra element
351
364
// being added here.
352
365
uint64_t expected_hash_table_size = hash_table_size_;
353
- if (expected_hash_table_size < (kvs_. size () + 1 ) / max_hash_table_ratio_) {
366
+ if (expected_hash_table_size < (num_entries_ + 1 ) / max_hash_table_ratio_) {
354
367
expected_hash_table_size *= 2 ;
355
368
}
356
- return (kvs_[0 ].first .size () + kvs_[0 ].second .size ()) *
357
- expected_hash_table_size - 1 ;
369
+ return (key_size_ + value_size_) * expected_hash_table_size - 1 ;
358
370
}
359
371
}
360
372
@@ -390,7 +402,7 @@ bool CuckooTableBuilder::MakeSpaceForKey(
390
402
// of the method. We store this number into the nodes that we explore in
391
403
// current method call.
392
404
// It is unlikely for the increment operation to overflow because the maximum
393
- // no. of times this will be called is <= max_num_hash_func_ + kvs_.size() .
405
+ // no. of times this will be called is <= max_num_hash_func_ + num_entries_ .
394
406
for (uint32_t hash_cnt = 0 ; hash_cnt < num_hash_func_; ++hash_cnt) {
395
407
uint64_t bucket_id = hash_vals[hash_cnt];
396
408
(*buckets)[bucket_id].make_space_for_key_call_id =
@@ -408,9 +420,7 @@ bool CuckooTableBuilder::MakeSpaceForKey(
408
420
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id ];
409
421
for (uint32_t hash_cnt = 0 ;
410
422
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
411
- uint64_t child_bucket_id = CuckooHash (
412
- (is_last_level_file_ ? kvs_[curr_bucket.vector_idx ].first :
413
- ExtractUserKey (Slice (kvs_[curr_bucket.vector_idx ].first ))),
423
+ uint64_t child_bucket_id = CuckooHash (GetUserKey (curr_bucket.vector_idx ),
414
424
hash_cnt, use_module_hash_, hash_table_size_, identity_as_first_hash_,
415
425
get_slice_hash_);
416
426
// Iterate inside Cuckoo Block.
0 commit comments