Skip to content

Commit 169406c

Browse files
authored
Delete files in multiple ranges at once (facebook#13)
Summary: Using `DeleteFilesInRange` to delete files in a lot of ranges can be slow, because `VersionSet::LogAndApply` is expensive. This PR adds a new `DeleteFilesInRange` function to delete files in multiple ranges at once. Close facebook#2951 Closes facebook#3431 Differential Revision: D6849228 Pulled By: ajkr fbshipit-source-id: daeedcabd8def4b1d9ee95a58266dee77b5d68cb
1 parent bd9f08b commit 169406c

File tree

6 files changed

+191
-39
lines changed

6 files changed

+191
-39
lines changed

db/convenience.cc

+10-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,17 @@ void CancelAllBackgroundWork(DB* db, bool wait) {
1919
}
2020

2121
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
22-
const Slice* begin, const Slice* end) {
22+
const Slice* begin, const Slice* end,
23+
bool include_end) {
24+
RangePtr range(begin, end);
25+
return DeleteFilesInRanges(db, column_family, &range, 1, include_end);
26+
}
27+
28+
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
29+
const RangePtr* ranges, size_t n,
30+
bool include_end) {
2331
return (static_cast_with_check<DBImpl, DB>(db->GetRootDB()))
24-
->DeleteFilesInRange(column_family, begin, end);
32+
->DeleteFilesInRanges(column_family, ranges, n, include_end);
2533
}
2634

2735
Status VerifySstFileChecksum(const Options& options,

db/db_compaction_test.cc

+116
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,122 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
14391439
ASSERT_GT(old_num_files, new_num_files);
14401440
}
14411441

1442+
TEST_F(DBCompactionTest, DeleteFilesInRanges) {
1443+
Options options = CurrentOptions();
1444+
options.write_buffer_size = 10 * 1024 * 1024;
1445+
options.max_bytes_for_level_multiplier = 2;
1446+
options.num_levels = 4;
1447+
options.max_background_compactions = 3;
1448+
options.disable_auto_compactions = true;
1449+
1450+
DestroyAndReopen(options);
1451+
int32_t value_size = 10 * 1024; // 10 KB
1452+
1453+
Random rnd(301);
1454+
std::map<int32_t, std::string> values;
1455+
1456+
// file [0 => 100), [100 => 200), ... [900, 1000)
1457+
for (auto i = 0; i < 10; i++) {
1458+
for (auto j = 0; j < 100; j++) {
1459+
auto k = i * 100 + j;
1460+
values[k] = RandomString(&rnd, value_size);
1461+
ASSERT_OK(Put(Key(k), values[k]));
1462+
}
1463+
ASSERT_OK(Flush());
1464+
}
1465+
ASSERT_EQ("10", FilesPerLevel(0));
1466+
CompactRangeOptions compact_options;
1467+
compact_options.change_level = true;
1468+
compact_options.target_level = 2;
1469+
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
1470+
ASSERT_EQ("0,0,10", FilesPerLevel(0));
1471+
1472+
// file [0 => 100), [200 => 300), ... [800, 900)
1473+
for (auto i = 0; i < 10; i+=2) {
1474+
for (auto j = 0; j < 100; j++) {
1475+
auto k = i * 100 + j;
1476+
ASSERT_OK(Put(Key(k), values[k]));
1477+
}
1478+
ASSERT_OK(Flush());
1479+
}
1480+
ASSERT_EQ("5,0,10", FilesPerLevel(0));
1481+
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
1482+
ASSERT_EQ("0,5,10", FilesPerLevel(0));
1483+
1484+
// Delete files in range [0, 299] (inclusive)
1485+
{
1486+
auto begin_str1 = Key(0), end_str1 = Key(100);
1487+
auto begin_str2 = Key(100), end_str2 = Key(200);
1488+
auto begin_str3 = Key(200), end_str3 = Key(299);
1489+
Slice begin1(begin_str1), end1(end_str1);
1490+
Slice begin2(begin_str2), end2(end_str2);
1491+
Slice begin3(begin_str3), end3(end_str3);
1492+
std::vector<RangePtr> ranges;
1493+
ranges.push_back(RangePtr(&begin1, &end1));
1494+
ranges.push_back(RangePtr(&begin2, &end2));
1495+
ranges.push_back(RangePtr(&begin3, &end3));
1496+
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
1497+
ranges.data(), ranges.size()));
1498+
ASSERT_EQ("0,3,7", FilesPerLevel(0));
1499+
1500+
// Keys [0, 300) should not exist.
1501+
for (auto i = 0; i < 300; i++) {
1502+
ReadOptions ropts;
1503+
std::string result;
1504+
auto s = db_->Get(ropts, Key(i), &result);
1505+
ASSERT_TRUE(s.IsNotFound());
1506+
}
1507+
for (auto i = 300; i < 1000; i++) {
1508+
ASSERT_EQ(Get(Key(i)), values[i]);
1509+
}
1510+
}
1511+
1512+
// Delete files in range [600, 999) (exclusive)
1513+
{
1514+
auto begin_str1 = Key(600), end_str1 = Key(800);
1515+
auto begin_str2 = Key(700), end_str2 = Key(900);
1516+
auto begin_str3 = Key(800), end_str3 = Key(999);
1517+
Slice begin1(begin_str1), end1(end_str1);
1518+
Slice begin2(begin_str2), end2(end_str2);
1519+
Slice begin3(begin_str3), end3(end_str3);
1520+
std::vector<RangePtr> ranges;
1521+
ranges.push_back(RangePtr(&begin1, &end1));
1522+
ranges.push_back(RangePtr(&begin2, &end2));
1523+
ranges.push_back(RangePtr(&begin3, &end3));
1524+
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
1525+
ranges.data(), ranges.size(), false));
1526+
ASSERT_EQ("0,1,4", FilesPerLevel(0));
1527+
1528+
// Keys [600, 900) should not exist.
1529+
for (auto i = 600; i < 900; i++) {
1530+
ReadOptions ropts;
1531+
std::string result;
1532+
auto s = db_->Get(ropts, Key(i), &result);
1533+
ASSERT_TRUE(s.IsNotFound());
1534+
}
1535+
for (auto i = 300; i < 600; i++) {
1536+
ASSERT_EQ(Get(Key(i)), values[i]);
1537+
}
1538+
for (auto i = 900; i < 1000; i++) {
1539+
ASSERT_EQ(Get(Key(i)), values[i]);
1540+
}
1541+
}
1542+
1543+
// Delete all files.
1544+
{
1545+
RangePtr range;
1546+
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1));
1547+
ASSERT_EQ("", FilesPerLevel(0));
1548+
1549+
for (auto i = 0; i < 1000; i++) {
1550+
ReadOptions ropts;
1551+
std::string result;
1552+
auto s = db_->Get(ropts, Key(i), &result);
1553+
ASSERT_TRUE(s.IsNotFound());
1554+
}
1555+
}
1556+
}
1557+
14421558
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
14431559
// regression test for #2833: groups of files whose user-keys overlap at the
14441560
// endpoints could be split by `DeleteFilesInRange`. This caused old data to

db/db_impl.cc

+45-34
Original file line numberDiff line numberDiff line change
@@ -2029,52 +2029,63 @@ Status DBImpl::DeleteFile(std::string name) {
20292029
return status;
20302030
}
20312031

2032-
Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
2033-
const Slice* begin, const Slice* end) {
2032+
Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
2033+
const RangePtr* ranges, size_t n,
2034+
bool include_end) {
20342035
Status status;
20352036
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
20362037
ColumnFamilyData* cfd = cfh->cfd();
20372038
VersionEdit edit;
2038-
std::vector<FileMetaData*> deleted_files;
2039+
std::set<FileMetaData*> deleted_files;
20392040
JobContext job_context(next_job_id_.fetch_add(1), true);
20402041
{
20412042
InstrumentedMutexLock l(&mutex_);
20422043
Version* input_version = cfd->current();
20432044

20442045
auto* vstorage = input_version->storage_info();
2045-
for (int i = 1; i < cfd->NumberLevels(); i++) {
2046-
if (vstorage->LevelFiles(i).empty() ||
2047-
!vstorage->OverlapInLevel(i, begin, end)) {
2048-
continue;
2049-
}
2050-
std::vector<FileMetaData*> level_files;
2051-
InternalKey begin_storage, end_storage, *begin_key, *end_key;
2052-
if (begin == nullptr) {
2053-
begin_key = nullptr;
2054-
} else {
2055-
begin_storage.SetMaxPossibleForUserKey(*begin);
2056-
begin_key = &begin_storage;
2057-
}
2058-
if (end == nullptr) {
2059-
end_key = nullptr;
2060-
} else {
2061-
end_storage.SetMinPossibleForUserKey(*end);
2062-
end_key = &end_storage;
2063-
}
2064-
2065-
vstorage->GetCleanInputsWithinInterval(i, begin_key, end_key,
2066-
&level_files, -1 /* hint_index */,
2067-
nullptr /* file_index */);
2068-
FileMetaData* level_file;
2069-
for (uint32_t j = 0; j < level_files.size(); j++) {
2070-
level_file = level_files[j];
2071-
if (level_file->being_compacted) {
2046+
for (size_t r = 0; r < n; r++) {
2047+
auto begin = ranges[r].start, end = ranges[r].limit;
2048+
for (int i = 1; i < cfd->NumberLevels(); i++) {
2049+
if (vstorage->LevelFiles(i).empty() ||
2050+
!vstorage->OverlapInLevel(i, begin, end)) {
20722051
continue;
20732052
}
2074-
edit.SetColumnFamily(cfd->GetID());
2075-
edit.DeleteFile(i, level_file->fd.GetNumber());
2076-
deleted_files.push_back(level_file);
2077-
level_file->being_compacted = true;
2053+
std::vector<FileMetaData*> level_files;
2054+
InternalKey begin_storage, end_storage, *begin_key, *end_key;
2055+
if (begin == nullptr) {
2056+
begin_key = nullptr;
2057+
} else {
2058+
begin_storage.SetMinPossibleForUserKey(*begin);
2059+
begin_key = &begin_storage;
2060+
}
2061+
if (end == nullptr) {
2062+
end_key = nullptr;
2063+
} else {
2064+
end_storage.SetMaxPossibleForUserKey(*end);
2065+
end_key = &end_storage;
2066+
}
2067+
2068+
vstorage->GetCleanInputsWithinInterval(i, begin_key, end_key,
2069+
&level_files, -1 /* hint_index */,
2070+
nullptr /* file_index */);
2071+
FileMetaData* level_file;
2072+
for (uint32_t j = 0; j < level_files.size(); j++) {
2073+
level_file = level_files[j];
2074+
if (level_file->being_compacted) {
2075+
continue;
2076+
}
2077+
if (deleted_files.find(level_file) != deleted_files.end()) {
2078+
continue;
2079+
}
2080+
if (!include_end && end != nullptr &&
2081+
cfd->user_comparator()->Compare(level_file->largest.user_key(), *end) == 0) {
2082+
continue;
2083+
}
2084+
edit.SetColumnFamily(cfd->GetID());
2085+
edit.DeleteFile(i, level_file->fd.GetNumber());
2086+
deleted_files.insert(level_file);
2087+
level_file->being_compacted = true;
2088+
}
20782089
}
20792090
}
20802091
if (edit.GetDeletedFiles().empty()) {

db/db_impl.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,9 @@ class DBImpl : public DB {
237237
const TransactionLogIterator::ReadOptions&
238238
read_options = TransactionLogIterator::ReadOptions()) override;
239239
virtual Status DeleteFile(std::string name) override;
240-
Status DeleteFilesInRange(ColumnFamilyHandle* column_family,
241-
const Slice* begin, const Slice* end);
240+
Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
241+
const RangePtr* ranges, size_t n,
242+
bool include_end = true);
242243

243244
virtual void GetLiveFilesMetaData(
244245
std::vector<LiveFileMetaData>* metadata) override;

include/rocksdb/convenience.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,15 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
329329
// in the range.
330330
// Snapshots before the delete might not see the data in the given range.
331331
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
332-
const Slice* begin, const Slice* end);
332+
const Slice* begin, const Slice* end,
333+
bool include_end = true);
334+
335+
// Delete files in multiple ranges at once
336+
// Delete files in a lot of ranges one at a time can be slow, use this API for
337+
// better performance in that case.
338+
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
339+
const RangePtr* ranges, size_t n,
340+
bool include_end = true);
333341

334342
// Verify the checksum of file
335343
Status VerifySstFileChecksum(const Options& options,

include/rocksdb/db.h

+8
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ struct Range {
9999
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
100100
};
101101

102+
struct RangePtr {
103+
const Slice* start;
104+
const Slice* limit;
105+
106+
RangePtr() : start(nullptr), limit(nullptr) { }
107+
RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) { }
108+
};
109+
102110
// A collections of table properties objects, where
103111
// key: is the table's file name.
104112
// value: the table properties object of the given table.

0 commit comments

Comments
 (0)