Skip to content

Commit 5ad7ee0

Browse files
committed
[CF] Log deletion in column families
Summary: * Added unit test that verifies that obsolete files are deleted. * Advance log number for empty column family when cutting log file. * MinLogNumber() bug fix! (caught by the new unit test) Test Plan: unit test Reviewers: dhruba, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16311
1 parent dc277f0 commit 5ad7ee0

File tree

3 files changed

+123
-7
lines changed

3 files changed

+123
-7
lines changed

db/column_family_test.cc

+107-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "rocksdb/env.h"
1212
#include "rocksdb/db.h"
1313
#include "util/testharness.h"
14+
#include "util/testutil.h"
1415
#include "utilities/merge_operators.h"
1516

1617
#include <algorithm>
@@ -21,9 +22,17 @@ namespace rocksdb {
2122

2223
using namespace std;
2324

25+
namespace {
26+
std::string RandomString(Random* rnd, int len) {
27+
std::string r;
28+
test::RandomString(rnd, len, &r);
29+
return r;
30+
}
31+
} // anonymous namespace
32+
2433
class ColumnFamilyTest {
2534
public:
26-
ColumnFamilyTest() {
35+
ColumnFamilyTest() : rnd_(139) {
2736
env_ = Env::Default();
2837
dbname_ = test::TmpDir() + "/column_family_test";
2938
db_options_.create_if_missing = true;
@@ -39,6 +48,10 @@ class ColumnFamilyTest {
3948
db_ = nullptr;
4049
}
4150

51+
Status Open() {
52+
return Open({"default"});
53+
}
54+
4255
Status Open(vector<string> cf) {
4356
vector<ColumnFamilyDescriptor> column_families;
4457
for (auto x : cf) {
@@ -48,6 +61,8 @@ class ColumnFamilyTest {
4861
return DB::Open(db_options_, dbname_, column_families, &handles_, &db_);
4962
}
5063

64+
DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); }
65+
5166
void Destroy() {
5267
for (auto h : handles_) {
5368
delete h;
@@ -75,6 +90,18 @@ class ColumnFamilyTest {
7590
}
7691
}
7792

93+
void PutRandomData(int cf, int bytes) {
94+
int num_insertions = (bytes + 99) / 100;
95+
for (int i = 0; i < num_insertions; ++i) {
96+
// 10 bytes key, 90 bytes value
97+
ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 10), RandomString(&rnd_, 90)));
98+
}
99+
}
100+
101+
void WaitForFlush(int cf) {
102+
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
103+
}
104+
78105
Status Put(int cf, const string& key, const string& value) {
79106
return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value));
80107
}
@@ -144,6 +171,18 @@ class ColumnFamilyTest {
144171
}
145172
}
146173

174+
int CountLiveLogFiles() {
175+
int ret = 0;
176+
VectorLogPtr wal_files;
177+
ASSERT_OK(db_->GetSortedWalFiles(wal_files));
178+
for (const auto& wal : wal_files) {
179+
if (wal->Type() == kAliveLogFile) {
180+
++ret;
181+
}
182+
}
183+
return ret;
184+
}
185+
147186
void CopyFile(const string& source, const string& destination,
148187
uint64_t size = 0) {
149188
const EnvOptions soptions;
@@ -174,6 +213,7 @@ class ColumnFamilyTest {
174213
string dbname_;
175214
DB* db_ = nullptr;
176215
Env* env_;
216+
Random rnd_;
177217
};
178218

179219
TEST(ColumnFamilyTest, AddDrop) {
@@ -355,6 +395,72 @@ TEST(ColumnFamilyTest, FlushTest) {
355395
Close();
356396
}
357397

398+
// Makes sure that obsolete log files get deleted
399+
TEST(ColumnFamilyTest, LogDeletionTest) {
400+
column_family_options_.write_buffer_size = 100000; // 100KB
401+
ASSERT_OK(Open());
402+
CreateColumnFamilies({"one", "two", "three", "four"});
403+
// Each bracket is one log file. if number is in (), it means
404+
// we don't need it anymore (it's been flushed)
405+
// []
406+
ASSERT_EQ(CountLiveLogFiles(), 0);
407+
PutRandomData(0, 100);
408+
// [0]
409+
PutRandomData(1, 100);
410+
// [0, 1]
411+
PutRandomData(1, 100000);
412+
WaitForFlush(1);
413+
// [0, (1)] [1]
414+
ASSERT_EQ(CountLiveLogFiles(), 2);
415+
PutRandomData(0, 100);
416+
// [0, (1)] [0, 1]
417+
ASSERT_EQ(CountLiveLogFiles(), 2);
418+
PutRandomData(2, 100);
419+
// [0, (1)] [0, 1, 2]
420+
PutRandomData(2, 100000);
421+
WaitForFlush(2);
422+
// [0, (1)] [0, 1, (2)] [2]
423+
ASSERT_EQ(CountLiveLogFiles(), 3);
424+
PutRandomData(2, 100000);
425+
WaitForFlush(2);
426+
// [0, (1)] [0, 1, (2)] [(2)] [2]
427+
ASSERT_EQ(CountLiveLogFiles(), 4);
428+
PutRandomData(3, 100);
429+
// [0, (1)] [0, 1, (2)] [(2)] [2, 3]
430+
PutRandomData(1, 100);
431+
// [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3]
432+
ASSERT_EQ(CountLiveLogFiles(), 4);
433+
PutRandomData(1, 100000);
434+
WaitForFlush(1);
435+
// [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1]
436+
ASSERT_EQ(CountLiveLogFiles(), 5);
437+
PutRandomData(0, 100000);
438+
WaitForFlush(0);
439+
// [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0]
440+
// delete obsolete logs -->
441+
// [(1), 2, 3] [1, (0)] [0]
442+
ASSERT_EQ(CountLiveLogFiles(), 3);
443+
PutRandomData(0, 100000);
444+
WaitForFlush(0);
445+
// [(1), 2, 3] [1, (0)], [(0)] [0]
446+
ASSERT_EQ(CountLiveLogFiles(), 4);
447+
PutRandomData(1, 100000);
448+
WaitForFlush(1);
449+
// [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1]
450+
ASSERT_EQ(CountLiveLogFiles(), 5);
451+
PutRandomData(2, 100000);
452+
WaitForFlush(2);
453+
// [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2]
454+
ASSERT_EQ(CountLiveLogFiles(), 6);
455+
PutRandomData(3, 100000);
456+
WaitForFlush(3);
457+
// [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3]
458+
// delete obsolete logs -->
459+
// [0, (1)] [1, (2)], [2, (3)] [3]
460+
ASSERT_EQ(CountLiveLogFiles(), 4);
461+
Close();
462+
}
463+
358464
} // namespace rocksdb
359465

360466
int main(int argc, char** argv) {

db/db_impl.cc

+13-4
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,8 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
11121112
mutex_.Unlock();
11131113
std::vector<Iterator*> memtables;
11141114
for (MemTable* m : mems) {
1115-
Log(options_.info_log, "Flushing memtable with next log file: %lu\n",
1115+
Log(options_.info_log,
1116+
"[CF %u] Flushing memtable with next log file: %lu\n", cfd->GetID(),
11161117
(unsigned long)m->GetNextLogNumber());
11171118
memtables.push_back(m->NewIterator());
11181119
}
@@ -3578,20 +3579,28 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
35783579
if (!s.ok()) {
35793580
// Avoid chewing through file number space in a tight loop.
35803581
versions_->ReuseFileNumber(new_log_number);
3581-
assert (!new_mem);
3582+
assert(!new_mem);
35823583
break;
35833584
}
35843585
logfile_number_ = new_log_number;
35853586
log_.reset(new log::Writer(std::move(lfile)));
35863587
cfd->mem()->SetNextLogNumber(logfile_number_);
3587-
// TODO also update log number for all column families with empty
3588-
// memtables (i.e. don't have data in the old log)
35893588
cfd->imm()->Add(cfd->mem());
35903589
if (force) {
35913590
cfd->imm()->FlushRequested();
35923591
}
35933592
new_mem->Ref();
35943593
alive_log_files_.push_back(logfile_number_);
3594+
for (auto cfd : *versions_->GetColumnFamilySet()) {
3595+
// all this is just optimization to delete logs that
3596+
// are no longer needed -- if CF is empty, that means it
3597+
// doesn't need that particular log to stay alive, so we just
3598+
// advance the log number. no need to persist this in the manifest
3599+
if (cfd->mem()->GetFirstSequenceNumber() == 0 &&
3600+
cfd->imm()->size() == 0) {
3601+
cfd->SetLogNumber(logfile_number_);
3602+
}
3603+
}
35953604
cfd->SetMemtable(new_mem);
35963605
Log(options_.info_log, "New memtable created with log file: #%lu\n",
35973606
(unsigned long)logfile_number_);

db/version_set.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <vector>
2525
#include <deque>
2626
#include <atomic>
27+
#include <limits>
2728
#include "db/dbformat.h"
2829
#include "db/version_edit.h"
2930
#include "port/port.h"
@@ -359,9 +360,9 @@ class VersionSet {
359360
// Returns the minimum log number such that all
360361
// log numbers less than or equal to it can be deleted
361362
uint64_t MinLogNumber() const {
362-
uint64_t min_log_num = 0;
363+
uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
363364
for (auto cfd : *column_family_set_) {
364-
if (min_log_num == 0 || min_log_num > cfd->GetLogNumber()) {
365+
if (min_log_num > cfd->GetLogNumber()) {
365366
min_log_num = cfd->GetLogNumber();
366367
}
367368
}

0 commit comments

Comments
 (0)