Skip to content

Commit c583157

Browse files
committed
MemTableListVersion
Summary: MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful. This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team. I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet()) Test Plan: `make check` works. I will also do `make valgrind_check` before commit Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak Reviewed By: kailiu CC: leveldb Differential Revision: https://reviews.facebook.net/D15255
1 parent f131d4c commit c583157

File tree

4 files changed

+184
-129
lines changed

4 files changed

+184
-129
lines changed

db/db_impl.cc

+42-52
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
264264
bg_cv_(&mutex_),
265265
mem_rep_factory_(options_.memtable_factory.get()),
266266
mem_(new MemTable(internal_comparator_, options_)),
267+
imm_(options_.min_write_buffer_number_to_merge),
267268
logfile_number_(0),
268269
super_version_(nullptr),
269270
super_version_number_(0),
@@ -360,7 +361,7 @@ DBImpl::~DBImpl() {
360361
delete mem_->Unref();
361362
}
362363

363-
imm_.UnrefAll(&to_delete);
364+
imm_.current()->Unref(&to_delete);
364365
for (MemTable* m: to_delete) {
365366
delete m;
366367
}
@@ -508,21 +509,21 @@ bool DBImpl::SuperVersion::Unref() {
508509

509510
void DBImpl::SuperVersion::Cleanup() {
510511
assert(refs.load(std::memory_order_relaxed) == 0);
511-
imm.UnrefAll(&to_delete);
512+
imm->Unref(&to_delete);
512513
MemTable* m = mem->Unref();
513514
if (m != nullptr) {
514515
to_delete.push_back(m);
515516
}
516517
current->Unref();
517518
}
518519

519-
void DBImpl::SuperVersion::Init(MemTable* new_mem, const MemTableList& new_imm,
520+
void DBImpl::SuperVersion::Init(MemTable* new_mem, MemTableListVersion* new_imm,
520521
Version* new_current) {
521522
mem = new_mem;
522523
imm = new_imm;
523524
current = new_current;
524525
mem->Ref();
525-
imm.RefAll();
526+
imm->Ref();
526527
current->Ref();
527528
refs.store(1, std::memory_order_relaxed);
528529
}
@@ -1221,7 +1222,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
12211222
mutex_.AssertHeld();
12221223
assert(imm_.size() != 0);
12231224

1224-
if (!imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
1225+
if (!imm_.IsFlushPending()) {
12251226
Log(options_.info_log, "FlushMemTableToOutputFile already in progress");
12261227
Status s = Status::IOError("FlushMemTableToOutputFile already in progress");
12271228
return s;
@@ -1762,8 +1763,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
17621763
} else if (shutting_down_.Acquire_Load()) {
17631764
// DB is being deleted; no more background compactions
17641765
} else {
1765-
bool is_flush_pending =
1766-
imm_.IsFlushPending(options_.min_write_buffer_number_to_merge);
1766+
bool is_flush_pending = imm_.IsFlushPending();
17671767
if (is_flush_pending &&
17681768
(bg_flush_scheduled_ < options_.max_background_flushes)) {
17691769
// memtable flush needed
@@ -1798,8 +1798,7 @@ void DBImpl::BGWorkCompaction(void* db) {
17981798
Status DBImpl::BackgroundFlush(bool* madeProgress,
17991799
DeletionState& deletion_state) {
18001800
Status stat;
1801-
while (stat.ok() &&
1802-
imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
1801+
while (stat.ok() && imm_.IsFlushPending()) {
18031802
Log(options_.info_log,
18041803
"BackgroundCallFlush doing FlushMemTableToOutputFile, flush slots available %d",
18051804
options_.max_background_flushes - bg_flush_scheduled_);
@@ -1919,7 +1918,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
19191918
mutex_.AssertHeld();
19201919

19211920
// TODO: remove memtable flush from formal compaction
1922-
while (imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
1921+
while (imm_.IsFlushPending()) {
19231922
Log(options_.info_log,
19241923
"BackgroundCompaction doing FlushMemTableToOutputFile, compaction slots "
19251924
"available %d",
@@ -2325,7 +2324,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
23252324
const uint64_t imm_start = env_->NowMicros();
23262325
LogFlush(options_.info_log);
23272326
mutex_.Lock();
2328-
if (imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
2327+
if (imm_.IsFlushPending()) {
23292328
FlushMemTableToOutputFile(nullptr, deletion_state);
23302329
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
23312330
}
@@ -2658,8 +2657,9 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
26582657
namespace {
26592658
struct IterState {
26602659
port::Mutex* mu;
2661-
Version* version;
2662-
std::vector<MemTable*> mem; // includes both mem_ and imm_
2660+
Version* version = nullptr;
2661+
MemTable* mem = nullptr;
2662+
MemTableListVersion* imm = nullptr;
26632663
DBImpl *db;
26642664
};
26652665

@@ -2668,15 +2668,16 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
26682668
DBImpl::DeletionState deletion_state(state->db->GetOptions().
26692669
max_write_buffer_number);
26702670
state->mu->Lock();
2671-
for (unsigned int i = 0; i < state->mem.size(); i++) {
2672-
MemTable* m = state->mem[i]->Unref();
2673-
if (m != nullptr) {
2674-
deletion_state.memtables_to_free.push_back(m);
2675-
}
2671+
MemTable* m = state->mem->Unref();
2672+
if (m != nullptr) {
2673+
deletion_state.memtables_to_free.push_back(m);
26762674
}
26772675
if (state->version) { // not set for memtable-only iterator
26782676
state->version->Unref();
26792677
}
2678+
if (state->imm) { // not set for memtable-only iterator
2679+
state->imm->Unref(&deletion_state.memtables_to_free);
2680+
}
26802681
// fast path FindObsoleteFiles
26812682
state->db->FindObsoleteFiles(deletion_state, false, true);
26822683
state->mu->Unlock();
@@ -2690,7 +2691,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
26902691
SequenceNumber* latest_snapshot) {
26912692
IterState* cleanup = new IterState;
26922693
MemTable* mutable_mem;
2693-
std::vector<MemTable*> immutables;
2694+
MemTableListVersion* immutable_mems;
26942695
Version* version;
26952696

26962697
// Collect together all needed child iterators for mem
@@ -2699,27 +2700,22 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
26992700
mem_->Ref();
27002701
mutable_mem = mem_;
27012702
// Collect together all needed child iterators for imm_
2702-
imm_.GetMemTables(&immutables);
2703-
for (unsigned int i = 0; i < immutables.size(); i++) {
2704-
immutables[i]->Ref();
2705-
}
2703+
immutable_mems = imm_.current();
2704+
immutable_mems->Ref();
27062705
versions_->current()->Ref();
27072706
version = versions_->current();
27082707
mutex_.Unlock();
27092708

2710-
std::vector<Iterator*> list;
2711-
list.push_back(mutable_mem->NewIterator(options));
2712-
cleanup->mem.push_back(mutable_mem);
2713-
2709+
std::vector<Iterator*> iterator_list;
2710+
iterator_list.push_back(mutable_mem->NewIterator(options));
2711+
cleanup->mem = mutable_mem;
2712+
cleanup->imm = immutable_mems;
27142713
// Collect all needed child iterators for immutable memtables
2715-
for (MemTable* m : immutables) {
2716-
list.push_back(m->NewIterator(options));
2717-
cleanup->mem.push_back(m);
2718-
}
2714+
immutable_mems->AddIterators(options, &iterator_list);
27192715
// Collect iterators for files in L0 - Ln
2720-
version->AddIterators(options, storage_options_, &list);
2721-
Iterator* internal_iter =
2722-
NewMergingIterator(&internal_comparator_, &list[0], list.size());
2716+
version->AddIterators(options, storage_options_, &iterator_list);
2717+
Iterator* internal_iter = NewMergingIterator(
2718+
&internal_comparator_, &iterator_list[0], iterator_list.size());
27232719
cleanup->version = version;
27242720
cleanup->mu = &mutex_;
27252721
cleanup->db = this;
@@ -2738,19 +2734,15 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
27382734
uint64_t* superversion_number) {
27392735

27402736
MemTable* mutable_mem;
2741-
std::vector<MemTable*> immutables;
2737+
MemTableListVersion* immutable_mems;
27422738
Version* version;
27432739

2744-
immutables.reserve(options_.max_write_buffer_number);
2745-
27462740
// get all child iterators and bump their refcounts under lock
27472741
mutex_.Lock();
27482742
mutable_mem = mem_;
27492743
mutable_mem->Ref();
2750-
imm_.GetMemTables(&immutables);
2751-
for (size_t i = 0; i < immutables.size(); ++i) {
2752-
immutables[i]->Ref();
2753-
}
2744+
immutable_mems = imm_.current();
2745+
immutable_mems->Ref();
27542746
version = versions_->current();
27552747
version->Ref();
27562748
if (superversion_number != nullptr) {
@@ -2760,7 +2752,7 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
27602752

27612753
Iterator* mutable_iter = mutable_mem->NewIterator(options);
27622754
IterState* mutable_cleanup = new IterState();
2763-
mutable_cleanup->mem.push_back(mutable_mem);
2755+
mutable_cleanup->mem = mutable_mem;
27642756
mutable_cleanup->db = this;
27652757
mutable_cleanup->mu = &mutex_;
27662758
mutable_iter->RegisterCleanup(CleanupIteratorState, mutable_cleanup, nullptr);
@@ -2772,10 +2764,8 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
27722764
Iterator* immutable_iter;
27732765
IterState* immutable_cleanup = new IterState();
27742766
std::vector<Iterator*> list;
2775-
for (MemTable* m : immutables) {
2776-
list.push_back(m->NewIterator(options));
2777-
immutable_cleanup->mem.push_back(m);
2778-
}
2767+
immutable_mems->AddIterators(options, &list);
2768+
immutable_cleanup->imm = immutable_mems;
27792769
version->AddIterators(options, storage_options_, &list);
27802770
immutable_cleanup->version = version;
27812771
immutable_cleanup->db = this;
@@ -2832,7 +2822,7 @@ void DBImpl::InstallSuperVersion(DeletionState& deletion_state) {
28322822
DBImpl::SuperVersion* DBImpl::InstallSuperVersion(
28332823
SuperVersion* new_superversion) {
28342824
mutex_.AssertHeld();
2835-
new_superversion->Init(mem_, imm_, versions_->current());
2825+
new_superversion->Init(mem_, imm_.current(), versions_->current());
28362826
SuperVersion* old_superversion = super_version_;
28372827
super_version_ = new_superversion;
28382828
++super_version_number_;
@@ -2875,7 +2865,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
28752865
if (get_version->mem->Get(lkey, value, &s, merge_context, options_)) {
28762866
// Done
28772867
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
2878-
} else if (get_version->imm.Get(lkey, value, &s, merge_context, options_)) {
2868+
} else if (get_version->imm->Get(lkey, value, &s, merge_context, options_)) {
28792869
// Done
28802870
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
28812871
} else {
@@ -2930,10 +2920,10 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
29302920
}
29312921

29322922
MemTable* mem = mem_;
2933-
MemTableList imm = imm_;
2923+
MemTableListVersion* imm = imm_.current();
29342924
Version* current = versions_->current();
29352925
mem->Ref();
2936-
imm.RefAll();
2926+
imm->Ref();
29372927
current->Ref();
29382928

29392929
// Unlock while reading from files and memtables
@@ -2965,7 +2955,7 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
29652955
LookupKey lkey(keys[i], snapshot);
29662956
if (mem->Get(lkey, value, &s, merge_context, options_)) {
29672957
// Done
2968-
} else if (imm.Get(lkey, value, &s, merge_context, options_)) {
2958+
} else if (imm->Get(lkey, value, &s, merge_context, options_)) {
29692959
// Done
29702960
} else {
29712961
current->Get(options, lkey, value, &s, &merge_context, &stats, options_);
@@ -2984,7 +2974,7 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
29842974
MaybeScheduleFlushOrCompaction();
29852975
}
29862976
MemTable* m = mem->Unref();
2987-
imm.UnrefAll(&to_delete);
2977+
imm->Unref(&to_delete);
29882978
current->Unref();
29892979
mutex_.Unlock();
29902980

db/db_impl.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,10 @@ class DBImpl : public DB {
140140
// holds references to memtable, all immutable memtables and version
141141
struct SuperVersion {
142142
MemTable* mem;
143-
MemTableList imm;
143+
MemTableListVersion* imm;
144144
Version* current;
145145
std::atomic<uint32_t> refs;
146-
// We need to_delete because during Cleanup(), imm.UnrefAll() returns
146+
// We need to_delete because during Cleanup(), imm->Unref() returns
147147
// all memtables that we need to free through this vector. We then
148148
// delete all those memtables outside of mutex, during destruction
149149
std::vector<MemTable*> to_delete;
@@ -161,7 +161,7 @@ class DBImpl : public DB {
161161
// that needs to be deleted in to_delete vector. Unrefing those
162162
// objects needs to be done in the mutex
163163
void Cleanup();
164-
void Init(MemTable* new_mem, const MemTableList& new_imm,
164+
void Init(MemTable* new_mem, MemTableListVersion* new_imm,
165165
Version* new_current);
166166
};
167167

0 commit comments

Comments
 (0)