Skip to content

Commit 9625acb

Browse files
committed
[CF] Dont reuse dropped column family IDs
Summary: Column family IDs should be unique, even if column family is dropped. To achieve this, we save max column family in manifest. Note that the diff is still not ready. I'm only using differential to move the patch to my Mac machine. Test Plan: added a test to column_family_test Reviewers: dhruba, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16581
1 parent e21d5b8 commit 9625acb

7 files changed

+94
-10
lines changed

db/column_family.cc

+6
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,12 @@ uint32_t ColumnFamilySet::GetNextColumnFamilyID() {
399399
return ++max_column_family_;
400400
}
401401

402+
uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; }
403+
404+
void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) {
405+
max_column_family_ = std::max(new_max_column_family, max_column_family_);
406+
}
407+
402408
// under a DB mutex
403409
ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
404410
const std::string& name, uint32_t id, Version* dummy_versions,

db/column_family.h

+6-5
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,11 @@ class ColumnFamilySet {
290290
uint32_t GetID(const std::string& name);
291291
// this call will return the next available column family ID. it guarantees
292292
// that there is no column family with id greater than or equal to the
293-
// returned value in the current running instance. It does not, however,
294-
// guarantee that the returned ID is unique accross RocksDB restarts.
295-
// For example, if a client adds a column family 6 and then drops it,
296-
// after a restart, we might reuse column family 6 ID.
293+
// returned value in the current running instance or anytime in RocksDB
294+
// instance history.
297295
uint32_t GetNextColumnFamilyID();
296+
uint32_t GetMaxColumnFamily();
297+
void UpdateMaxColumnFamily(uint32_t new_max_column_family);
298298

299299
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id,
300300
Version* dummy_version,
@@ -314,7 +314,8 @@ class ColumnFamilySet {
314314
// family might get dropped when you release the DB mutex.
315315
// * GetDefault(), GetColumnFamily(), Exists(), GetID() -- either inside of DB
316316
// mutex or call Lock()
317-
// * GetNextColumnFamilyID() -- inside of DB mutex
317+
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
318+
// inside of DB mutex
318319
void Lock();
319320
void Unlock();
320321

db/column_family_test.cc

+26
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,32 @@ class ColumnFamilyTest {
284284
Random rnd_;
285285
};
286286

287+
TEST(ColumnFamilyTest, DontReuseColumnFamilyID) {
288+
for (int iter = 0; iter < 3; ++iter) {
289+
Open();
290+
CreateColumnFamilies({"one", "two", "three"});
291+
for (size_t i = 0; i < handles_.size(); ++i) {
292+
ASSERT_EQ(i, handles_[i]->GetID());
293+
}
294+
if (iter == 1) {
295+
Reopen();
296+
}
297+
DropColumnFamilies({3});
298+
Reopen();
299+
if (iter == 2) {
300+
// this tests if max_column_family is correctly persisted with
301+
// WriteSnapshot()
302+
Reopen();
303+
}
304+
CreateColumnFamilies({"three2"});
305+
// ID 3 that was used for dropped column family "three" should not be reused
306+
ASSERT_EQ(4, handles_[3]->GetID());
307+
Close();
308+
Destroy();
309+
}
310+
}
311+
312+
287313
TEST(ColumnFamilyTest, AddDrop) {
288314
Open();
289315
CreateColumnFamilies({"one", "two", "three"});

db/version_edit.cc

+15
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ enum Tag {
3434
kColumnFamily = 200, // specify column family for version edit
3535
kColumnFamilyAdd = 201,
3636
kColumnFamilyDrop = 202,
37+
kMaxColumnFamily = 203,
3738
};
3839

3940
void VersionEdit::Clear() {
@@ -43,11 +44,13 @@ void VersionEdit::Clear() {
4344
prev_log_number_ = 0;
4445
last_sequence_ = 0;
4546
next_file_number_ = 0;
47+
max_column_family_ = 0;
4648
has_comparator_ = false;
4749
has_log_number_ = false;
4850
has_prev_log_number_ = false;
4951
has_next_file_number_ = false;
5052
has_last_sequence_ = false;
53+
has_max_column_family_ = false;
5154
deleted_files_.clear();
5255
new_files_.clear();
5356
column_family_ = 0;
@@ -77,6 +80,10 @@ void VersionEdit::EncodeTo(std::string* dst) const {
7780
PutVarint32(dst, kLastSequence);
7881
PutVarint64(dst, last_sequence_);
7982
}
83+
if (has_max_column_family_) {
84+
PutVarint32(dst, kMaxColumnFamily);
85+
PutVarint32(dst, max_column_family_);
86+
}
8087

8188
for (const auto& deleted : deleted_files_) {
8289
PutVarint32(dst, kDeletedFile);
@@ -191,6 +198,14 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
191198
}
192199
break;
193200

201+
case kMaxColumnFamily:
202+
if (GetVarint32(&input, &max_column_family_)) {
203+
has_max_column_family_ = true;
204+
} else {
205+
msg = "max column family";
206+
}
207+
break;
208+
194209
case kCompactPointer:
195210
if (GetLevel(&input, &level, &msg) &&
196211
GetInternalKey(&input, &key)) {

db/version_edit.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ class VersionEdit {
7070
has_last_sequence_ = true;
7171
last_sequence_ = seq;
7272
}
73+
void SetMaxColumnFamily(uint32_t max_column_family) {
74+
has_max_column_family_ = true;
75+
max_column_family_ = max_column_family;
76+
}
7377

7478
// Add the specified file at the specified number.
7579
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
@@ -143,15 +147,17 @@ class VersionEdit {
143147
uint64_t log_number_;
144148
uint64_t prev_log_number_;
145149
uint64_t next_file_number_;
150+
uint32_t max_column_family_;
146151
SequenceNumber last_sequence_;
147152
bool has_comparator_;
148153
bool has_log_number_;
149154
bool has_prev_log_number_;
150155
bool has_next_file_number_;
151156
bool has_last_sequence_;
157+
bool has_max_column_family_;
152158

153159
DeletedFileSet deleted_files_;
154-
std::vector< std::pair<int, FileMetaData> > new_files_;
160+
std::vector<std::pair<int, FileMetaData>> new_files_;
155161

156162
// Each version edit record should have column_family_id set
157163
// If it's not set, it is default (0)

db/version_edit_test.cc

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ TEST(VersionEditTest, ColumnFamilyTest) {
4949
VersionEdit edit;
5050
edit.SetColumnFamily(2);
5151
edit.AddColumnFamily("column_family");
52+
edit.SetMaxColumnFamily(5);
5253
TestEncodeDecode(edit);
5354

5455
edit.Clear();

db/version_set.cc

+33-4
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,9 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
14971497
return Status::OK();
14981498
}
14991499
if (edit->is_column_family_drop_) {
1500+
// if we drop column family, we have to make sure to save max column family,
1501+
// so that we don't reuse existing ID
1502+
edit->SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
15001503
column_family_data->SetDropped();
15011504
}
15021505

@@ -1789,6 +1792,7 @@ Status VersionSet::Recover(
17891792
uint64_t last_sequence = 0;
17901793
uint64_t log_number = 0;
17911794
uint64_t prev_log_number = 0;
1795+
uint32_t max_column_family = 0;
17921796
std::unordered_map<uint32_t, Builder*> builders;
17931797

17941798
// add default column family
@@ -1918,6 +1922,10 @@ Status VersionSet::Recover(
19181922
have_next_file = true;
19191923
}
19201924

1925+
if (edit.has_max_column_family_) {
1926+
max_column_family = edit.max_column_family_;
1927+
}
1928+
19211929
if (edit.has_last_sequence_) {
19221930
last_sequence = edit.last_sequence_;
19231931
have_last_sequence = true;
@@ -1938,6 +1946,8 @@ Status VersionSet::Recover(
19381946
prev_log_number = 0;
19391947
}
19401948

1949+
column_family_set_->UpdateMaxColumnFamily(max_column_family);
1950+
19411951
MarkFileNumberUsed(prev_log_number);
19421952
MarkFileNumberUsed(log_number);
19431953
}
@@ -1981,13 +1991,15 @@ Status VersionSet::Recover(
19811991
Log(options_->info_log, "Recovered from manifest file:%s succeeded,"
19821992
"manifest_file_number is %lu, next_file_number is %lu, "
19831993
"last_sequence is %lu, log_number is %lu,"
1984-
"prev_log_number is %lu\n",
1994+
"prev_log_number is %lu,"
1995+
"max_column_family is %u\n",
19851996
manifest_filename.c_str(),
19861997
(unsigned long)manifest_file_number_,
19871998
(unsigned long)next_file_number_,
19881999
(unsigned long)last_sequence_,
19892000
(unsigned long)log_number,
1990-
(unsigned long)prev_log_number_);
2001+
(unsigned long)prev_log_number_,
2002+
column_family_set_->GetMaxColumnFamily());
19912003

19922004
for (auto cfd : *column_family_set_) {
19932005
Log(options_->info_log,
@@ -2267,6 +2279,10 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
22672279
last_sequence = edit.last_sequence_;
22682280
have_last_sequence = true;
22692281
}
2282+
2283+
if (edit.has_max_column_family_) {
2284+
column_family_set_->UpdateMaxColumnFamily(edit.max_column_family_);
2285+
}
22702286
}
22712287
}
22722288
file.reset();
@@ -2315,9 +2331,10 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
23152331

23162332
printf(
23172333
"manifest_file_number %lu next_file_number %lu last_sequence "
2318-
"%lu prev_log_number %lu\n",
2334+
"%lu prev_log_number %lu max_column_family %u\n",
23192335
(unsigned long)manifest_file_number_, (unsigned long)next_file_number_,
2320-
(unsigned long)last_sequence, (unsigned long)prev_log_number);
2336+
(unsigned long)last_sequence, (unsigned long)prev_log_number,
2337+
column_family_set_->GetMaxColumnFamily());
23212338
}
23222339

23232340
return s;
@@ -2378,6 +2395,18 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
23782395
}
23792396
}
23802397

2398+
// save max column family to avoid reusing the same column
2399+
// family ID for two different column families
2400+
if (column_family_set_->GetMaxColumnFamily() > 0) {
2401+
VersionEdit edit;
2402+
edit.SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
2403+
std::string record;
2404+
edit.EncodeTo(&record);
2405+
Status s = log->AddRecord(record);
2406+
if (!s.ok()) {
2407+
return s;
2408+
}
2409+
}
23812410
return Status::OK();
23822411
}
23832412

0 commit comments

Comments
 (0)