Skip to content

Commit 0b4ccf7

Browse files
committed
Flushes should always go to HIGH priority thread pool
Summary: This is not column-family related diff. It is in columnfamily branch because the change is significant and we want to push it with next major release (3.0). It removes the leveldb notion of one thread pool and expands it to two thread pools by default (HIGH and LOW). Flush process is removed from compaction process and all flush threads are executed on HIGH thread pool, since we don't want long-running compactions to influence flush latency. Test Plan: make check Reviewers: dhruba, haobo, kailiu, sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D15987
1 parent f8d5443 commit 0b4ccf7

File tree

4 files changed

+34
-57
lines changed

4 files changed

+34
-57
lines changed

HISTORY.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Rocksdb Change Log
22

3-
## Unreleased
3+
## Unreleased (will be released in 3.0)
4+
* By default, max_background_flushes is 1 and flush process is
5+
removed from background compaction process. Flush process is now always
6+
executed in high priority thread pool.
7+
8+
## Unreleased (will be relased in 2.8)
49
* By default, checksums are verified on every read from database
510

611

db/db_impl.cc

+16-52
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ Options SanitizeOptions(const std::string& dbname,
142142
DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
143143
DBOptions result = src;
144144
ClipToRange(&result.max_open_files, 20, 1000000);
145+
if (result.max_background_flushes == 0) {
146+
result.max_background_flushes = 1;
147+
}
145148

146149
if (result.info_log == nullptr) {
147150
Status s = CreateLoggerFromOptions(dbname, result.db_log_dir, src.env,
@@ -1704,11 +1707,15 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
17041707
is_flush_pending = true;
17051708
}
17061709
}
1707-
if (is_flush_pending &&
1708-
(bg_flush_scheduled_ < options_.max_background_flushes)) {
1710+
if (is_flush_pending) {
17091711
// memtable flush needed
1710-
bg_flush_scheduled_++;
1711-
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
1712+
// max_background_compactions should not be 0, because that means
1713+
// flush will never get executed
1714+
assert(options_.max_background_flushes != 0);
1715+
if (bg_flush_scheduled_ < options_.max_background_flushes) {
1716+
bg_flush_scheduled_++;
1717+
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
1718+
}
17121719
}
17131720
bool is_compaction_needed = false;
17141721
for (auto cfd : *versions_->GetColumnFamilySet()) {
@@ -1718,12 +1725,10 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
17181725
}
17191726
}
17201727

1721-
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
1722-
// flush, but the HIGH pool is not enabled). Do it only if
1723-
// max_background_compactions hasn't been reached and, in case
1728+
// Schedule BGWorkCompaction if there's a compaction pending
1729+
// Do it only if max_background_compactions hasn't been reached and, in case
17241730
// bg_manual_only_ > 0, if it's a manual compaction.
1725-
if ((manual_compaction_ || is_compaction_needed ||
1726-
(is_flush_pending && (options_.max_background_flushes <= 0))) &&
1731+
if ((manual_compaction_ || is_compaction_needed) &&
17271732
bg_compaction_scheduled_ < options_.max_background_compactions &&
17281733
(!bg_manual_only_ || manual_compaction_)) {
17291734

@@ -1868,41 +1873,14 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
18681873
*madeProgress = false;
18691874
mutex_.AssertHeld();
18701875

1876+
unique_ptr<Compaction> c;
18711877
bool is_manual = (manual_compaction_ != nullptr) &&
18721878
(manual_compaction_->in_progress == false);
1873-
if (is_manual) {
1874-
// another thread cannot pick up the same work
1875-
manual_compaction_->in_progress = true;
1876-
}
1877-
1878-
// TODO: remove memtable flush from formal compaction
1879-
for (auto cfd : *versions_->GetColumnFamilySet()) {
1880-
while (cfd->imm()->IsFlushPending()) {
1881-
Log(options_.info_log,
1882-
"BackgroundCompaction doing FlushMemTableToOutputFile with column "
1883-
"family %d, compaction slots available %d",
1884-
cfd->GetID(),
1885-
options_.max_background_compactions - bg_compaction_scheduled_);
1886-
Status stat =
1887-
FlushMemTableToOutputFile(cfd, madeProgress, deletion_state);
1888-
if (!stat.ok()) {
1889-
if (is_manual) {
1890-
manual_compaction_->status = stat;
1891-
manual_compaction_->done = true;
1892-
manual_compaction_->in_progress = false;
1893-
manual_compaction_ = nullptr;
1894-
}
1895-
return stat;
1896-
}
1897-
}
1898-
}
1899-
1900-
unique_ptr<Compaction> c;
19011879
InternalKey manual_end_storage;
19021880
InternalKey* manual_end = &manual_end_storage;
19031881
if (is_manual) {
19041882
ManualCompaction* m = manual_compaction_;
1905-
assert(m->in_progress);
1883+
m->in_progress = true;
19061884
c.reset(m->cfd->CompactRange(m->input_level, m->output_level, m->begin,
19071885
m->end, &manual_end));
19081886
if (!c) {
@@ -2299,20 +2277,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
22992277
}
23002278

23012279
for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
2302-
// Prioritize immutable compaction work
2303-
// TODO: remove memtable flush from normal compaction work
2304-
if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) {
2305-
const uint64_t imm_start = env_->NowMicros();
2306-
LogFlush(options_.info_log);
2307-
mutex_.Lock();
2308-
if (cfd->imm()->IsFlushPending()) {
2309-
FlushMemTableToOutputFile(cfd, nullptr, deletion_state);
2310-
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
2311-
}
2312-
mutex_.Unlock();
2313-
imm_micros += (env_->NowMicros() - imm_start);
2314-
}
2315-
23162280
Slice key = input->key();
23172281
Slice value = input->value();
23182282

include/rocksdb/options.h

+11-3
Original file line numberDiff line numberDiff line change
@@ -526,21 +526,29 @@ struct DBOptions {
526526
// regardless of this setting
527527
uint64_t delete_obsolete_files_period_micros;
528528

529-
// Maximum number of concurrent background jobs, submitted to
530-
// the default LOW priority thread pool
529+
// Maximum number of concurrent background compaction jobs, submitted to
530+
// the default LOW priority thread pool.
531+
// If you're increasing this, also consider increasing number of threads in
532+
// LOW priority thread pool. For more information, see
533+
// Env::SetBackgroundThreads
531534
// Default: 1
532535
int max_background_compactions;
533536

534537
// Maximum number of concurrent background memtable flush jobs, submitted to
535538
// the HIGH priority thread pool.
539+
//
536540
// By default, all background jobs (major compaction and memtable flush) go
537541
// to the LOW priority pool. If this option is set to a positive number,
538542
// memtable flush jobs will be submitted to the HIGH priority pool.
539543
// It is important when the same Env is shared by multiple db instances.
540544
// Without a separate pool, long running major compaction jobs could
541545
// potentially block memtable flush jobs of other db instances, leading to
542546
// unnecessary Put stalls.
543-
// Default: 0
547+
//
548+
// If you're increasing this, also consider increasing number of threads in
549+
// HIGH priority thread pool. For more information, see
550+
// Env::SetBackgroundThreads
551+
// Default: 1
544552
int max_background_flushes;
545553

546554
// Specify the maximal size of the info log file. If the log file

util/options.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ DBOptions::DBOptions()
150150
wal_dir(""),
151151
delete_obsolete_files_period_micros(6 * 60 * 60 * 1000000UL),
152152
max_background_compactions(1),
153-
max_background_flushes(0),
153+
max_background_flushes(1),
154154
max_log_file_size(0),
155155
log_file_time_to_roll(0),
156156
keep_log_file_num(1000),

0 commit comments

Comments
 (0)