Skip to content

Commit 5cd0576

Browse files
committed
Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method.
Summary: Fix compaction bug in Cuckoo Table Builder. Use kvs_.size() instead of num_entries in FileSize() method. Also added tests. Test Plan: make check all Also ran db_bench to generate multiple files. Reviewers: sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D22743
1 parent 0fbb3fa commit 5cd0576

File tree

3 files changed

+60
-12
lines changed

3 files changed

+60
-12
lines changed

db/cuckoo_table_db_test.cc

+25-1
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,38 @@ TEST(CuckooTableDBTest, CompactionTrigger) {
245245
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
246246
}
247247
dbfull()->TEST_WaitForFlushMemTable();
248-
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
248+
ASSERT_EQ("2", FilesPerLevel());
249249

250+
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
250251
ASSERT_EQ("0,2", FilesPerLevel());
251252
for (int idx = 0; idx < 22; ++idx) {
252253
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
253254
}
254255
}
255256

257+
TEST(CuckooTableDBTest, CompactionIntoMultipleFiles) {
258+
// Create a big L0 file and check it compacts into multiple files in L1.
259+
Options options = CurrentOptions();
260+
options.write_buffer_size = 270 << 10;
261+
// Two SST files should be created, each containing 14 keys.
262+
// Number of buckets will be 16. Total size ~156 KB.
263+
options.target_file_size_base = 160 << 10;
264+
Reopen(&options);
265+
266+
// Write 28 values, each 10016 B ~ 10KB
267+
for (int idx = 0; idx < 28; ++idx) {
268+
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
269+
}
270+
dbfull()->TEST_WaitForFlushMemTable();
271+
ASSERT_EQ("1", FilesPerLevel());
272+
273+
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
274+
ASSERT_EQ("0,2", FilesPerLevel());
275+
for (int idx = 0; idx < 28; ++idx) {
276+
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
277+
}
278+
}
279+
256280
TEST(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) {
257281
// Insert same key twice so that they go to different SST files. Then wait for
258282
// compaction and check if the latest value is stored and old value removed.

table/cuckoo_table_builder.cc

+2-3
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,14 @@ CuckooTableBuilder::CuckooTableBuilder(
5656
ucomp_(user_comparator),
5757
get_slice_hash_(get_slice_hash),
5858
closed_(false) {
59-
properties_.num_entries = 0;
6059
// Data is in a huge block.
6160
properties_.num_data_blocks = 1;
6261
properties_.index_size = 0;
6362
properties_.filter_size = 0;
6463
}
6564

6665
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
67-
if (properties_.num_entries >= kMaxVectorIdx - 1) {
66+
if (kvs_.size() >= kMaxVectorIdx - 1) {
6867
status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
6968
return;
7069
}
@@ -311,7 +310,7 @@ uint64_t CuckooTableBuilder::NumEntries() const {
311310
uint64_t CuckooTableBuilder::FileSize() const {
312311
if (closed_) {
313312
return file_->GetFileSize();
314-
} else if (properties_.num_entries == 0) {
313+
} else if (kvs_.size() == 0) {
315314
return 0;
316315
}
317316

table/cuckoo_table_builder_test.cc

+33-8
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
135135
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
136136
4, 100, BytewiseComparator(), 1, GetSliceHash);
137137
ASSERT_OK(builder.status());
138+
ASSERT_EQ(0UL, builder.FileSize());
138139
ASSERT_OK(builder.Finish());
139140
ASSERT_OK(writable_file->Close());
140141
CheckFileContents({}, {}, {}, "", 0, 2, false);
@@ -155,6 +156,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
155156
for (auto& user_key : user_keys) {
156157
keys.push_back(GetInternalKey(user_key, false));
157158
}
159+
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
158160

159161
unique_ptr<WritableFile> writable_file;
160162
fname = test::TmpDir() + "/NoCollisionFullKey";
@@ -167,10 +169,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
167169
ASSERT_EQ(builder.NumEntries(), i + 1);
168170
ASSERT_OK(builder.status());
169171
}
172+
uint32_t bucket_size = keys[0].size() + values[0].size();
173+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
170174
ASSERT_OK(builder.Finish());
171175
ASSERT_OK(writable_file->Close());
176+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
172177

173-
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
174178
std::string expected_unused_bucket = GetInternalKey("key00", true);
175179
expected_unused_bucket += std::string(values[0].size(), 'a');
176180
CheckFileContents(keys, values, expected_locations,
@@ -192,6 +196,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
192196
for (auto& user_key : user_keys) {
193197
keys.push_back(GetInternalKey(user_key, false));
194198
}
199+
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
195200

196201
unique_ptr<WritableFile> writable_file;
197202
fname = test::TmpDir() + "/WithCollisionFullKey";
@@ -204,10 +209,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
204209
ASSERT_EQ(builder.NumEntries(), i + 1);
205210
ASSERT_OK(builder.status());
206211
}
212+
uint32_t bucket_size = keys[0].size() + values[0].size();
213+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
207214
ASSERT_OK(builder.Finish());
208215
ASSERT_OK(writable_file->Close());
216+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
209217

210-
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
211218
std::string expected_unused_bucket = GetInternalKey("key00", true);
212219
expected_unused_bucket += std::string(values[0].size(), 'a');
213220
CheckFileContents(keys, values, expected_locations,
@@ -229,6 +236,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
229236
for (auto& user_key : user_keys) {
230237
keys.push_back(GetInternalKey(user_key, false));
231238
}
239+
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
232240

233241
unique_ptr<WritableFile> writable_file;
234242
uint32_t cuckoo_block_size = 2;
@@ -242,10 +250,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
242250
ASSERT_EQ(builder.NumEntries(), i + 1);
243251
ASSERT_OK(builder.status());
244252
}
253+
uint32_t bucket_size = keys[0].size() + values[0].size();
254+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
245255
ASSERT_OK(builder.Finish());
246256
ASSERT_OK(writable_file->Close());
257+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
247258

248-
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
249259
std::string expected_unused_bucket = GetInternalKey("key00", true);
250260
expected_unused_bucket += std::string(values[0].size(), 'a');
251261
CheckFileContents(keys, values, expected_locations,
@@ -272,6 +282,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
272282
for (auto& user_key : user_keys) {
273283
keys.push_back(GetInternalKey(user_key, false));
274284
}
285+
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
275286

276287
unique_ptr<WritableFile> writable_file;
277288
fname = test::TmpDir() + "/WithCollisionPathFullKey";
@@ -284,10 +295,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
284295
ASSERT_EQ(builder.NumEntries(), i + 1);
285296
ASSERT_OK(builder.status());
286297
}
298+
uint32_t bucket_size = keys[0].size() + values[0].size();
299+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
287300
ASSERT_OK(builder.Finish());
288301
ASSERT_OK(writable_file->Close());
302+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
289303

290-
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
291304
std::string expected_unused_bucket = GetInternalKey("key00", true);
292305
expected_unused_bucket += std::string(values[0].size(), 'a');
293306
CheckFileContents(keys, values, expected_locations,
@@ -311,6 +324,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
311324
for (auto& user_key : user_keys) {
312325
keys.push_back(GetInternalKey(user_key, false));
313326
}
327+
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
314328

315329
unique_ptr<WritableFile> writable_file;
316330
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
@@ -323,10 +337,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
323337
ASSERT_EQ(builder.NumEntries(), i + 1);
324338
ASSERT_OK(builder.status());
325339
}
340+
uint32_t bucket_size = keys[0].size() + values[0].size();
341+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
326342
ASSERT_OK(builder.Finish());
327343
ASSERT_OK(writable_file->Close());
344+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
328345

329-
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
330346
std::string expected_unused_bucket = GetInternalKey("key00", true);
331347
expected_unused_bucket += std::string(values[0].size(), 'a');
332348
CheckFileContents(keys, values, expected_locations,
@@ -344,6 +360,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
344360
{user_keys[3], {3, 4, 5, 6}}
345361
};
346362
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
363+
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
347364

348365
unique_ptr<WritableFile> writable_file;
349366
fname = test::TmpDir() + "/NoCollisionUserKey";
@@ -356,10 +373,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
356373
ASSERT_EQ(builder.NumEntries(), i + 1);
357374
ASSERT_OK(builder.status());
358375
}
376+
uint32_t bucket_size = user_keys[0].size() + values[0].size();
377+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
359378
ASSERT_OK(builder.Finish());
360379
ASSERT_OK(writable_file->Close());
380+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
361381

362-
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
363382
std::string expected_unused_bucket = "key00";
364383
expected_unused_bucket += std::string(values[0].size(), 'a');
365384
CheckFileContents(user_keys, values, expected_locations,
@@ -377,6 +396,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
377396
{user_keys[3], {0, 1, 2, 3}},
378397
};
379398
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
399+
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
380400

381401
unique_ptr<WritableFile> writable_file;
382402
fname = test::TmpDir() + "/WithCollisionUserKey";
@@ -389,10 +409,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
389409
ASSERT_EQ(builder.NumEntries(), i + 1);
390410
ASSERT_OK(builder.status());
391411
}
412+
uint32_t bucket_size = user_keys[0].size() + values[0].size();
413+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
392414
ASSERT_OK(builder.Finish());
393415
ASSERT_OK(writable_file->Close());
416+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
394417

395-
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
396418
std::string expected_unused_bucket = "key00";
397419
expected_unused_bucket += std::string(values[0].size(), 'a');
398420
CheckFileContents(user_keys, values, expected_locations,
@@ -412,6 +434,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
412434
{user_keys[4], {0, 2}},
413435
};
414436
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
437+
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
415438

416439
unique_ptr<WritableFile> writable_file;
417440
fname = test::TmpDir() + "/WithCollisionPathUserKey";
@@ -424,10 +447,12 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
424447
ASSERT_EQ(builder.NumEntries(), i + 1);
425448
ASSERT_OK(builder.status());
426449
}
450+
uint32_t bucket_size = user_keys[0].size() + values[0].size();
451+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
427452
ASSERT_OK(builder.Finish());
428453
ASSERT_OK(writable_file->Close());
454+
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
429455

430-
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
431456
std::string expected_unused_bucket = "key00";
432457
expected_unused_bucket += std::string(values[0].size(), 'a');
433458
CheckFileContents(user_keys, values, expected_locations,

0 commit comments

Comments
 (0)