Skip to content

Commit bd45633

Browse files
committed
Fix data race against logging data structure because of LogBuffer
Summary: @igor pointed out that there is a potential data race because of the way we use the newly introduced LogBuffer. After "bg_compaction_scheduled_--" or "bg_flush_scheduled_--", they can both become 0. As soon as the lock is released after that, DBImpl's deconstructor can go ahead and deconstruct all the states inside DB, including the info_log object hold in a shared pointer of the options object it keeps. At that point it is not safe anymore to continue using the info logger to write the delayed logs. With the patch, lock is released temporarily for log buffer to be flushed before "bg_compaction_scheduled_--" or "bg_flush_scheduled_--". In order to make sure we don't miss any pending flush or compaction, a new flag bg_schedule_needed_ is added, which is set to be true if there is a pending flush or compaction but not scheduled because of the max thread limit. If the flag is set to be true, the scheduling function will be called before compaction or flush thread finishes. Thanks @igor for this finding! Test Plan: make all check Reviewers: haobo, igor Reviewed By: haobo CC: dhruba, ljin, yhchiang, igor, leveldb Differential Revision: https://reviews.facebook.net/D16767
1 parent 02dab3b commit bd45633

File tree

3 files changed

+51
-15
lines changed

3 files changed

+51
-15
lines changed

db/db_impl.cc

+45-15
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
276276
super_version_number_(0),
277277
local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)),
278278
tmp_batch_(),
279+
bg_schedule_needed_(false),
279280
bg_compaction_scheduled_(0),
280281
bg_manual_only_(0),
281282
bg_flush_scheduled_(0),
@@ -1830,17 +1831,21 @@ Status DBImpl::TEST_WaitForCompact() {
18301831

18311832
void DBImpl::MaybeScheduleFlushOrCompaction() {
18321833
mutex_.AssertHeld();
1834+
bg_schedule_needed_ = false;
18331835
if (bg_work_gate_closed_) {
18341836
// gate closed for backgrond work
18351837
} else if (shutting_down_.Acquire_Load()) {
18361838
// DB is being deleted; no more background compactions
18371839
} else {
18381840
bool is_flush_pending = imm_.IsFlushPending();
1839-
if (is_flush_pending &&
1840-
(bg_flush_scheduled_ < options_.max_background_flushes)) {
1841-
// memtable flush needed
1842-
bg_flush_scheduled_++;
1843-
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
1841+
if (is_flush_pending) {
1842+
if (bg_flush_scheduled_ < options_.max_background_flushes) {
1843+
// memtable flush needed
1844+
bg_flush_scheduled_++;
1845+
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
1846+
} else {
1847+
bg_schedule_needed_ = true;
1848+
}
18441849
}
18451850

18461851
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
@@ -1850,11 +1855,13 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
18501855
if ((manual_compaction_ ||
18511856
versions_->current()->NeedsCompaction() ||
18521857
(is_flush_pending && (options_.max_background_flushes <= 0))) &&
1853-
bg_compaction_scheduled_ < options_.max_background_compactions &&
18541858
(!bg_manual_only_ || manual_compaction_)) {
1855-
1856-
bg_compaction_scheduled_++;
1857-
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
1859+
if (bg_compaction_scheduled_ < options_.max_background_compactions) {
1860+
bg_compaction_scheduled_++;
1861+
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
1862+
} else {
1863+
bg_schedule_needed_ = true;
1864+
}
18581865
}
18591866
}
18601867
}
@@ -1912,15 +1919,26 @@ void DBImpl::BackgroundCallFlush() {
19121919
// to delete all obsolete files and we force FindObsoleteFiles()
19131920
FindObsoleteFiles(deletion_state, !s.ok());
19141921
// delete unnecessary files if any, this is done outside the mutex
1915-
if (deletion_state.HaveSomethingToDelete()) {
1922+
if (deletion_state.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
19161923
mutex_.Unlock();
1924+
// Have to flush the info logs before bg_flush_scheduled_--
1925+
// because if bg_flush_scheduled_ becomes 0 and the lock is
1926+
// released, the deconstructor of DB can kick in and destroy all the
1927+
// states of DB so info_log might not be available after that point.
1928+
// It also applies to access other states that DB owns.
19171929
log_buffer.FlushBufferToLog();
1918-
PurgeObsoleteFiles(deletion_state);
1930+
if (deletion_state.HaveSomethingToDelete()) {
1931+
PurgeObsoleteFiles(deletion_state);
1932+
}
19191933
mutex_.Lock();
19201934
}
19211935

19221936
bg_flush_scheduled_--;
1923-
if (madeProgress) {
1937+
// Any time the mutex is released After finding the work to do, another
1938+
// thread might execute MaybeScheduleFlushOrCompaction(). It is possible
1939+
// that there is a pending job but it is not scheduled because of the
1940+
// max thread limit.
1941+
if (madeProgress || bg_schedule_needed_) {
19241942
MaybeScheduleFlushOrCompaction();
19251943
}
19261944
log_buffer.FlushBufferToLog();
@@ -1979,10 +1997,17 @@ void DBImpl::BackgroundCallCompaction() {
19791997
FindObsoleteFiles(deletion_state, !s.ok());
19801998

19811999
// delete unnecessary files if any, this is done outside the mutex
1982-
if (deletion_state.HaveSomethingToDelete()) {
2000+
if (deletion_state.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
19832001
mutex_.Unlock();
2002+
// Have to flush the info logs before bg_compaction_scheduled_--
2003+
// because if bg_flush_scheduled_ becomes 0 and the lock is
2004+
// released, the deconstructor of DB can kick in and destroy all the
2005+
// states of DB so info_log might not be available after that point.
2006+
// It also applies to access other states that DB owns.
19842007
log_buffer.FlushBufferToLog();
1985-
PurgeObsoleteFiles(deletion_state);
2008+
if (deletion_state.HaveSomethingToDelete()) {
2009+
PurgeObsoleteFiles(deletion_state);
2010+
}
19862011
mutex_.Lock();
19872012
}
19882013

@@ -1993,7 +2018,12 @@ void DBImpl::BackgroundCallCompaction() {
19932018
// Previous compaction may have produced too many files in a level,
19942019
// So reschedule another compaction if we made progress in the
19952020
// last compaction.
1996-
if (madeProgress) {
2021+
//
2022+
// Also, any time the mutex is released After finding the work to do,
2023+
// another thread might execute MaybeScheduleFlushOrCompaction(). It is
2024+
// possible that there is a pending job but it is not scheduled because of
2025+
// the max thread limit.
2026+
if (madeProgress || bg_schedule_needed_) {
19972027
MaybeScheduleFlushOrCompaction();
19982028
}
19992029
log_buffer.FlushBufferToLog();

db/db_impl.h

+4
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,10 @@ class DBImpl : public DB {
454454
// part of ongoing compactions.
455455
std::set<uint64_t> pending_outputs_;
456456

457+
// At least one compaction or flush job is pending but not yet scheduled
458+
// because of the max background thread limit.
459+
bool bg_schedule_needed_;
460+
457461
// count how many background compactions are running or have been scheduled
458462
int bg_compaction_scheduled_;
459463

util/log_buffer.h

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class LogBuffer {
2323
// Add a log entry to the buffer.
2424
void AddLogToBuffer(const char* format, va_list ap);
2525

26+
size_t IsEmpty() const { return logs_.empty(); }
27+
2628
// Flush all buffered log to the info log.
2729
void FlushBufferToLog();
2830

0 commit comments

Comments
 (0)