Skip to content

Commit 5d25a46

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents dff2b1a + 9b58c73 commit 5d25a46

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+796
-483
lines changed

db/column_family_test.cc

+6
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,15 @@ TEST(ColumnFamilyTest, WriteBatchFailure) {
408408
Open();
409409
CreateColumnFamiliesAndReopen({"one", "two"});
410410
WriteBatch batch;
411+
batch.Put(handles_[0], Slice("existing"), Slice("column-family"));
411412
batch.Put(handles_[1], Slice("non-existing"), Slice("column-family"));
412413
ASSERT_OK(db_->Write(WriteOptions(), &batch));
413414
DropColumnFamilies({1});
415+
WriteOptions woptions_ignore_missing_cf;
416+
woptions_ignore_missing_cf.ignore_missing_column_families = true;
417+
batch.Put(handles_[0], Slice("still here"), Slice("column-family"));
418+
ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch));
419+
ASSERT_EQ("column-family", Get(0, "still here"));
414420
Status s = db_->Write(WriteOptions(), &batch);
415421
ASSERT_TRUE(s.IsInvalidArgument());
416422
Close();

db/db_impl.cc

+93-49
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,10 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
290290
return result;
291291
}
292292

293+
namespace {
294+
293295
Status SanitizeDBOptionsByCFOptions(
294-
DBOptions* db_opts,
296+
const DBOptions* db_opts,
295297
const std::vector<ColumnFamilyDescriptor>& column_families) {
296298
Status s;
297299
for (auto cf : column_families) {
@@ -303,7 +305,6 @@ Status SanitizeDBOptionsByCFOptions(
303305
return Status::OK();
304306
}
305307

306-
namespace {
307308
CompressionType GetCompressionFlush(const Options& options) {
308309
// Compressing memtable flushes might not help unless the sequential load
309310
// optimization is used for leveled compaction. Otherwise the CPU and
@@ -631,7 +632,7 @@ bool CompareCandidateFile(const rocksdb::DBImpl::CandidateFileInfo& first,
631632
} else if (first.file_name < second.file_name) {
632633
return false;
633634
} else {
634-
return (first.path_id > first.path_id);
635+
return (first.path_id > second.path_id);
635636
}
636637
}
637638
}; // namespace
@@ -1301,14 +1302,20 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
13011302
WriteBatch batch;
13021303
while (reader.ReadRecord(&record, &scratch)) {
13031304
if (record.size() < 12) {
1304-
reporter.Corruption(
1305-
record.size(), Status::Corruption("log record too small"));
1305+
reporter.Corruption(record.size(),
1306+
Status::Corruption("log record too small"));
13061307
continue;
13071308
}
13081309
WriteBatchInternal::SetContents(&batch, record);
13091310

1311+
// If column family was not found, it might mean that the WAL write
1312+
// batch references to the column family that was dropped after the
1313+
// insert. We don't want to fail the whole write batch in that case -- we
1314+
// just ignore the update. That's why we set ignore missing column families
1315+
// to true
13101316
status = WriteBatchInternal::InsertInto(
1311-
&batch, column_family_memtables_.get(), true, log_number);
1317+
&batch, column_family_memtables_.get(),
1318+
true /* ignore missing column families */, log_number);
13121319

13131320
MaybeIgnoreError(&status);
13141321
if (!status.ok()) {
@@ -1677,6 +1684,13 @@ Status DBImpl::CompactRange(ColumnFamilyHandle* column_family,
16771684
}
16781685
LogFlush(options_.info_log);
16791686

1687+
{
1688+
MutexLock l(&mutex_);
1689+
// an automatic compaction that has been scheduled might have been
1690+
// preempted by the manual compactions. Need to schedule it back.
1691+
MaybeScheduleFlushOrCompaction();
1692+
}
1693+
16801694
return s;
16811695
}
16821696

@@ -1864,18 +1878,15 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level,
18641878
bg_cv_.Wait();
18651879
} else {
18661880
manual_compaction_ = &manual;
1867-
MaybeScheduleFlushOrCompaction();
1881+
assert(bg_compaction_scheduled_ == 0);
1882+
bg_compaction_scheduled_++;
1883+
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
18681884
}
18691885
}
18701886

18711887
assert(!manual.in_progress);
18721888
assert(bg_manual_only_ > 0);
18731889
--bg_manual_only_;
1874-
if (bg_manual_only_ == 0) {
1875-
// an automatic compaction should have been scheduled might have be
1876-
// preempted by the manual compactions. Need to schedule it back.
1877-
MaybeScheduleFlushOrCompaction();
1878-
}
18791890
return manual.status;
18801891
}
18811892

@@ -1963,11 +1974,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
19631974

19641975
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
19651976
// flush, but the HIGH pool is not enabled)
1966-
// Do it only if max_background_compactions hasn't been reached and, in case
1967-
// bg_manual_only_ > 0, if it's a manual compaction.
1968-
if ((manual_compaction_ || is_compaction_needed ||
1969-
(is_flush_pending && options_.max_background_flushes == 0)) &&
1970-
(!bg_manual_only_ || manual_compaction_)) {
1977+
// Do it only if max_background_compactions hasn't been reached and
1978+
// bg_manual_only_ == 0
1979+
if (!bg_manual_only_ &&
1980+
(is_compaction_needed ||
1981+
(is_flush_pending && options_.max_background_flushes == 0))) {
19711982
if (bg_compaction_scheduled_ < options_.max_background_compactions) {
19721983
bg_compaction_scheduled_++;
19731984
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
@@ -1979,7 +1990,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
19791990
}
19801991

19811992
void DBImpl::RecordFlushIOStats() {
1982-
RecordTick(stats_, FLUSH_WRITE_BYTES, iostats_context.bytes_written);
1993+
RecordTick(stats_, FLUSH_WRITE_BYTES, IOSTATS(bytes_written));
19831994
IOSTATS_RESET(bytes_written);
19841995
}
19851996

@@ -2194,6 +2205,10 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
21942205
if (is_manual) {
21952206
// another thread cannot pick up the same work
21962207
manual_compaction_->in_progress = true;
2208+
} else if (manual_compaction_ != nullptr) {
2209+
// there should be no automatic compactions running when manual compaction
2210+
// is running
2211+
return Status::OK();
21972212
}
21982213

21992214
// FLUSH preempts compaction
@@ -2313,7 +2328,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
23132328

23142329
if (status.ok()) {
23152330
// Done
2316-
} else if (shutting_down_.Acquire_Load()) {
2331+
} else if (status.IsShutdownInProgress()) {
23172332
// Ignore compaction errors found during shutting down
23182333
} else {
23192334
Log(InfoLogLevel::WARN_LEVEL, options_.info_log, "Compaction error: %s",
@@ -2573,6 +2588,10 @@ inline SequenceNumber DBImpl::findEarliestVisibleSnapshot(
25732588
uint64_t DBImpl::CallFlushDuringCompaction(ColumnFamilyData* cfd,
25742589
DeletionState& deletion_state,
25752590
LogBuffer* log_buffer) {
2591+
if (options_.max_background_flushes > 0) {
2592+
// flush thread will take care of this
2593+
return 0;
2594+
}
25762595
if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) {
25772596
const uint64_t imm_start = env_->NowMicros();
25782597
mutex_.Lock();
@@ -2626,9 +2645,29 @@ Status DBImpl::ProcessKeyValueCompaction(
26262645
compaction_filter = compaction_filter_from_factory.get();
26272646
}
26282647

2648+
int64_t key_drop_user = 0;
2649+
int64_t key_drop_newer_entry = 0;
2650+
int64_t key_drop_obsolete = 0;
2651+
int64_t loop_cnt = 0;
26292652
while (input->Valid() && !shutting_down_.Acquire_Load() &&
26302653
!cfd->IsDropped()) {
2631-
RecordCompactionIOStats();
2654+
if (++loop_cnt > 1000) {
2655+
if (key_drop_user > 0) {
2656+
RecordTick(stats_, COMPACTION_KEY_DROP_USER, key_drop_user);
2657+
key_drop_user = 0;
2658+
}
2659+
if (key_drop_newer_entry > 0) {
2660+
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY,
2661+
key_drop_newer_entry);
2662+
key_drop_newer_entry = 0;
2663+
}
2664+
if (key_drop_obsolete > 0) {
2665+
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE, key_drop_obsolete);
2666+
key_drop_obsolete = 0;
2667+
}
2668+
RecordCompactionIOStats();
2669+
loop_cnt = 0;
2670+
}
26322671
// FLUSH preempts compaction
26332672
// TODO(icanadi) this currently only checks if flush is necessary on
26342673
// compacting column family. we should also check if flush is necessary on
@@ -2709,7 +2748,7 @@ Status DBImpl::ProcessKeyValueCompaction(
27092748
ParseInternalKey(key, &ikey);
27102749
// no value associated with delete
27112750
value.clear();
2712-
RecordTick(stats_, COMPACTION_KEY_DROP_USER);
2751+
++key_drop_user;
27132752
} else if (value_changed) {
27142753
value = compaction_filter_value;
27152754
}
@@ -2733,7 +2772,7 @@ Status DBImpl::ProcessKeyValueCompaction(
27332772
// TODO: why not > ?
27342773
assert(last_sequence_for_key >= ikey.sequence);
27352774
drop = true; // (A)
2736-
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY);
2775+
++key_drop_newer_entry;
27372776
} else if (ikey.type == kTypeDeletion &&
27382777
ikey.sequence <= earliest_snapshot &&
27392778
compact->compaction->KeyNotExistsBeyondOutputLevel(ikey.user_key)) {
@@ -2745,7 +2784,7 @@ Status DBImpl::ProcessKeyValueCompaction(
27452784
// few iterations of this loop (by rule (A) above).
27462785
// Therefore this deletion marker is obsolete and can be dropped.
27472786
drop = true;
2748-
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE);
2787+
++key_drop_obsolete;
27492788
} else if (ikey.type == kTypeMerge) {
27502789
if (!merge.HasOperator()) {
27512790
LogToBuffer(log_buffer, "Options::merge_operator is null.");
@@ -2892,7 +2931,15 @@ Status DBImpl::ProcessKeyValueCompaction(
28922931
input->Next();
28932932
}
28942933
}
2895-
2934+
if (key_drop_user > 0) {
2935+
RecordTick(stats_, COMPACTION_KEY_DROP_USER, key_drop_user);
2936+
}
2937+
if (key_drop_newer_entry > 0) {
2938+
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY, key_drop_newer_entry);
2939+
}
2940+
if (key_drop_obsolete > 0) {
2941+
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE, key_drop_obsolete);
2942+
}
28962943
RecordCompactionIOStats();
28972944

28982945
return status;
@@ -3367,7 +3414,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
33673414
ColumnFamilyHandle* column_family, const Slice& key,
33683415
std::string* value, bool* value_found) {
33693416
StopWatch sw(env_, stats_, DB_GET);
3370-
PERF_TIMER_AUTO(get_snapshot_time);
3417+
PERF_TIMER_GUARD(get_snapshot_time);
33713418

33723419
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
33733420
auto cfd = cfh->cfd();
@@ -3391,27 +3438,27 @@ Status DBImpl::GetImpl(const ReadOptions& options,
33913438
// merge_operands will contain the sequence of merges in the latter case.
33923439
LookupKey lkey(key, snapshot);
33933440
PERF_TIMER_STOP(get_snapshot_time);
3441+
33943442
if (sv->mem->Get(lkey, value, &s, merge_context, *cfd->options())) {
33953443
// Done
33963444
RecordTick(stats_, MEMTABLE_HIT);
33973445
} else if (sv->imm->Get(lkey, value, &s, merge_context, *cfd->options())) {
33983446
// Done
33993447
RecordTick(stats_, MEMTABLE_HIT);
34003448
} else {
3401-
PERF_TIMER_START(get_from_output_files_time);
3402-
3449+
PERF_TIMER_GUARD(get_from_output_files_time);
34033450
sv->current->Get(options, lkey, value, &s, &merge_context, value_found);
3404-
PERF_TIMER_STOP(get_from_output_files_time);
34053451
RecordTick(stats_, MEMTABLE_MISS);
34063452
}
34073453

3408-
PERF_TIMER_START(get_post_process_time);
3454+
{
3455+
PERF_TIMER_GUARD(get_post_process_time);
34093456

3410-
ReturnAndCleanupSuperVersion(cfd, sv);
3457+
ReturnAndCleanupSuperVersion(cfd, sv);
34113458

3412-
RecordTick(stats_, NUMBER_KEYS_READ);
3413-
RecordTick(stats_, BYTES_READ, value->size());
3414-
PERF_TIMER_STOP(get_post_process_time);
3459+
RecordTick(stats_, NUMBER_KEYS_READ);
3460+
RecordTick(stats_, BYTES_READ, value->size());
3461+
}
34153462
return s;
34163463
}
34173464

@@ -3421,7 +3468,7 @@ std::vector<Status> DBImpl::MultiGet(
34213468
const std::vector<Slice>& keys, std::vector<std::string>* values) {
34223469

34233470
StopWatch sw(env_, stats_, DB_MULTIGET);
3424-
PERF_TIMER_AUTO(get_snapshot_time);
3471+
PERF_TIMER_GUARD(get_snapshot_time);
34253472

34263473
SequenceNumber snapshot;
34273474

@@ -3497,7 +3544,7 @@ std::vector<Status> DBImpl::MultiGet(
34973544
}
34983545

34993546
// Post processing (decrement reference counts and record statistics)
3500-
PERF_TIMER_START(get_post_process_time);
3547+
PERF_TIMER_GUARD(get_post_process_time);
35013548
autovector<SuperVersion*> superversions_to_delete;
35023549

35033550
// TODO(icanadi) do we need lock here or just around Cleanup()?
@@ -3870,7 +3917,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
38703917
if (my_batch == nullptr) {
38713918
return Status::Corruption("Batch is nullptr!");
38723919
}
3873-
PERF_TIMER_AUTO(write_pre_and_post_process_time);
3920+
PERF_TIMER_GUARD(write_pre_and_post_process_time);
38743921
Writer w(&mutex_);
38753922
w.batch = my_batch;
38763923
w.sync = options.sync;
@@ -4003,7 +4050,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
40034050

40044051
uint64_t log_size = 0;
40054052
if (!options.disableWAL) {
4006-
PERF_TIMER_START(write_wal_time);
4053+
PERF_TIMER_GUARD(write_wal_time);
40074054
Slice log_entry = WriteBatchInternal::Contents(updates);
40084055
status = log_->AddRecord(log_entry);
40094056
total_log_size_ += log_entry.size();
@@ -4021,13 +4068,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
40214068
status = log_->file()->Sync();
40224069
}
40234070
}
4024-
PERF_TIMER_STOP(write_wal_time);
40254071
}
40264072
if (status.ok()) {
4027-
PERF_TIMER_START(write_memtable_time);
4073+
PERF_TIMER_GUARD(write_memtable_time);
40284074

40294075
status = WriteBatchInternal::InsertInto(
4030-
updates, column_family_memtables_.get(), false, 0, this, false);
4076+
updates, column_family_memtables_.get(),
4077+
options.ignore_missing_column_families, 0, this, false);
40314078
// A non-OK status here indicates iteration failure (either in-memory
40324079
// writebatch corruption (very bad), or the client specified invalid
40334080
// column family). This will later on trigger bg_error_.
@@ -4036,8 +4083,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
40364083
// into the memtable would result in a state that some write ops might
40374084
// have succeeded in memtable but Status reports error for all writes.
40384085

4039-
PERF_TIMER_STOP(write_memtable_time);
4040-
40414086
SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence);
40424087
}
40434088
PERF_TIMER_START(write_pre_and_post_process_time);
@@ -4071,7 +4116,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
40714116
RecordTick(stats_, WRITE_TIMEDOUT);
40724117
}
40734118

4074-
PERF_TIMER_STOP(write_pre_and_post_process_time);
40754119
return status;
40764120
}
40774121

@@ -4759,11 +4803,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
47594803
column_families.push_back(
47604804
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
47614805
std::vector<ColumnFamilyHandle*> handles;
4762-
Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families);
4763-
if (!s.ok()) {
4764-
return s;
4765-
}
4766-
s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
4806+
Status s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
47674807
if (s.ok()) {
47684808
assert(handles.size() == 1);
47694809
// i can delete the handle since DBImpl is always holding a reference to
@@ -4776,6 +4816,10 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
47764816
Status DB::Open(const DBOptions& db_options, const std::string& dbname,
47774817
const std::vector<ColumnFamilyDescriptor>& column_families,
47784818
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
4819+
Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families);
4820+
if (!s.ok()) {
4821+
return s;
4822+
}
47794823
if (db_options.db_paths.size() > 1) {
47804824
for (auto& cfd : column_families) {
47814825
if (cfd.options.compaction_style != kCompactionStyleUniversal) {
@@ -4801,7 +4845,7 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
48014845
}
48024846

48034847
DBImpl* impl = new DBImpl(db_options, dbname);
4804-
Status s = impl->env_->CreateDirIfMissing(impl->options_.wal_dir);
4848+
s = impl->env_->CreateDirIfMissing(impl->options_.wal_dir);
48054849
if (s.ok()) {
48064850
for (auto db_path : impl->options_.db_paths) {
48074851
s = impl->env_->CreateDirIfMissing(db_path.path);

0 commit comments

Comments
 (0)