Skip to content

Commit a4a4a2d

Browse files
ajkrfacebook-github-bot
authored andcommitted
dedup ReadOptions in iterator hierarchy (facebook#7210)
Summary: Previously, a `ReadOptions` object was stored in every `BlockBasedTableIterator` and every `LevelIterator`. This redundancy consumes extra memory, resulting in the `Arena` making more allocations, and iteration observing worse cache performance. This PR migrates callers of `NewInternalIterator()` and `MakeInputIterator()` to provide a `ReadOptions` object guaranteed to outlive the returned iterator. When the iterator's lifetime will be managed by the user, this lifetime guarantee is achieved by storing the `ReadOptions` value in `ArenaWrappedDBIter`. Then, sub-iterators of `NewInternalIterator()` and `MakeInputIterator()` can hold a reference-to-const `ReadOptions`. Pull Request resolved: facebook#7210 Test Plan: - `make check` under ASAN and valgrind - benchmark: on a DB with 2 L0 files and 3 L1+ levels, this PR reduced `Arena` allocation 4792 -> 4160 bytes. Reviewed By: anand1976 Differential Revision: D22861323 Pulled By: ajkr fbshipit-source-id: 54aebb3e89c872eeab0f5793b4b6e42878d093ce
1 parent 18efd76 commit a4a4a2d

19 files changed

+155
-99
lines changed

db/arena_wrapped_db_iter.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ void ArenaWrappedDBIter::Init(Env* env, const ReadOptions& read_options,
4545
true, max_sequential_skip_in_iteration,
4646
read_callback, db_impl, cfd, allow_blob);
4747
sv_number_ = version_number;
48+
read_options_ = read_options;
4849
allow_refresh_ = allow_refresh;
4950
}
5051

@@ -98,8 +99,7 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator(
9899
max_sequential_skip_in_iterations, version_number, read_callback,
99100
db_impl, cfd, allow_blob, allow_refresh);
100101
if (db_impl != nullptr && cfd != nullptr && allow_refresh) {
101-
iter->StoreRefreshInfo(read_options, db_impl, cfd, read_callback,
102-
allow_blob);
102+
iter->StoreRefreshInfo(db_impl, cfd, read_callback, allow_blob);
103103
}
104104

105105
return iter;

db/arena_wrapped_db_iter.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class ArenaWrappedDBIter : public Iterator {
4141
virtual ReadRangeDelAggregator* GetRangeDelAggregator() {
4242
return db_iter_->GetRangeDelAggregator();
4343
}
44+
const ReadOptions& GetReadOptions() { return read_options_; }
4445

4546
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
4647
// a merging iterator.
@@ -79,10 +80,8 @@ class ArenaWrappedDBIter : public Iterator {
7980

8081
// Store some parameters so we can refresh the iterator at a later point
8182
// with these same params
82-
void StoreRefreshInfo(const ReadOptions& read_options, DBImpl* db_impl,
83-
ColumnFamilyData* cfd, ReadCallback* read_callback,
84-
bool allow_blob) {
85-
read_options_ = read_options;
83+
void StoreRefreshInfo(DBImpl* db_impl, ColumnFamilyData* cfd,
84+
ReadCallback* read_callback, bool allow_blob) {
8685
db_impl_ = db_impl;
8786
cfd_ = cfd;
8887
read_callback_ = read_callback;

db/builder.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,9 @@ Status BuildTable(
247247
// No matter whether use_direct_io_for_flush_and_compaction is true,
248248
// we will regrad this verification as user reads since the goal is
249249
// to cache it here for further user reads
250+
ReadOptions read_options;
250251
std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
251-
ReadOptions(), file_options, internal_comparator, *meta,
252+
read_options, file_options, internal_comparator, *meta,
252253
nullptr /* range_del_agg */,
253254
mutable_cf_options.prefix_extractor.get(), nullptr,
254255
(internal_stats == nullptr) ? nullptr

db/compaction/compaction_job.cc

+13-3
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,9 @@ Status CompactionJob::Run() {
676676
// No matter whether use_direct_io_for_flush_and_compaction is true,
677677
// we will regard this verification as user reads since the goal is
678678
// to cache it here for further user reads
679+
ReadOptions read_options;
679680
InternalIterator* iter = cfd->table_cache()->NewIterator(
680-
ReadOptions(), file_options_, cfd->internal_comparator(),
681+
read_options, file_options_, cfd->internal_comparator(),
681682
files_output[file_idx]->meta, /*range_del_agg=*/nullptr,
682683
prefix_extractor,
683684
/*table_reader_ptr=*/nullptr,
@@ -877,11 +878,20 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
877878

878879
CompactionRangeDelAggregator range_del_agg(&cfd->internal_comparator(),
879880
existing_snapshots_);
881+
ReadOptions read_options;
882+
read_options.verify_checksums = true;
883+
read_options.fill_cache = false;
884+
// Compaction iterators shouldn't be confined to a single prefix.
885+
// Compactions use Seek() for
886+
// (a) concurrent compactions,
887+
// (b) CompactionFilter::Decision::kRemoveAndSkipUntil.
888+
read_options.total_order_seek = true;
880889

881890
// Although the v2 aggregator is what the level iterator(s) know about,
882891
// the AddTombstones calls will be propagated down to the v1 aggregator.
883-
std::unique_ptr<InternalIterator> input(versions_->MakeInputIterator(
884-
sub_compact->compaction, &range_del_agg, file_options_for_read_));
892+
std::unique_ptr<InternalIterator> input(
893+
versions_->MakeInputIterator(read_options, sub_compact->compaction,
894+
&range_del_agg, file_options_for_read_));
885895

886896
AutoThreadOperationStageUpdater stage_updater(
887897
ThreadStatus::STAGE_COMPACTION_PROCESS_KV);

db/db_basic_test.cc

+25-8
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,35 @@ TEST_F(DBBasicTest, ReadOnlyDB) {
101101
ASSERT_OK(Put("foo", "v3"));
102102
Close();
103103

104+
auto verify_one_iter = [&](Iterator* iter) {
105+
int count = 0;
106+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
107+
ASSERT_OK(iter->status());
108+
++count;
109+
}
110+
// Always expect two keys: "foo" and "bar"
111+
ASSERT_EQ(count, 2);
112+
};
113+
114+
auto verify_all_iters = [&]() {
115+
Iterator* iter = db_->NewIterator(ReadOptions());
116+
verify_one_iter(iter);
117+
delete iter;
118+
119+
std::vector<Iterator*> iters;
120+
ASSERT_OK(db_->NewIterators(ReadOptions(),
121+
{dbfull()->DefaultColumnFamily()}, &iters));
122+
ASSERT_EQ(static_cast<uint64_t>(1), iters.size());
123+
verify_one_iter(iters[0]);
124+
delete iters[0];
125+
};
126+
104127
auto options = CurrentOptions();
105128
assert(options.env == env_);
106129
ASSERT_OK(ReadOnlyReopen(options));
107130
ASSERT_EQ("v3", Get("foo"));
108131
ASSERT_EQ("v2", Get("bar"));
109-
Iterator* iter = db_->NewIterator(ReadOptions());
110-
int count = 0;
111-
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
112-
ASSERT_OK(iter->status());
113-
++count;
114-
}
115-
ASSERT_EQ(count, 2);
116-
delete iter;
132+
verify_all_iters();
117133
Close();
118134

119135
// Reopen and flush memtable.
@@ -124,6 +140,7 @@ TEST_F(DBBasicTest, ReadOnlyDB) {
124140
ASSERT_OK(ReadOnlyReopen(options));
125141
ASSERT_EQ("v3", Get("foo"));
126142
ASSERT_EQ("v2", Get("bar"));
143+
verify_all_iters();
127144
ASSERT_TRUE(db_->SyncWAL().IsNotSupported());
128145
}
129146

db/db_compaction_filter_test.cc

+6-3
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
336336
InternalKeyComparator icmp(options.comparator);
337337
ReadRangeDelAggregator range_del_agg(&icmp,
338338
kMaxSequenceNumber /* upper_bound */);
339+
ReadOptions read_options;
339340
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
340-
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
341+
read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
341342
iter->SeekToFirst();
342343
ASSERT_OK(iter->status());
343344
while (iter->Valid()) {
@@ -426,8 +427,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
426427
InternalKeyComparator icmp(options.comparator);
427428
ReadRangeDelAggregator range_del_agg(&icmp,
428429
kMaxSequenceNumber /* upper_bound */);
430+
ReadOptions read_options;
429431
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
430-
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
432+
read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
431433
iter->SeekToFirst();
432434
ASSERT_OK(iter->status());
433435
while (iter->Valid()) {
@@ -644,8 +646,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) {
644646
InternalKeyComparator icmp(options.comparator);
645647
ReadRangeDelAggregator range_del_agg(&icmp,
646648
kMaxSequenceNumber /* snapshots */);
649+
ReadOptions read_options;
647650
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
648-
&arena, &range_del_agg, kMaxSequenceNumber));
651+
read_options, &arena, &range_del_agg, kMaxSequenceNumber));
649652
iter->SeekToFirst();
650653
ASSERT_OK(iter->status());
651654
while (iter->Valid()) {

db/db_impl/db_impl.cc

+12-10
Original file line numberDiff line numberDiff line change
@@ -1364,9 +1364,12 @@ bool DBImpl::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) {
13641364
}
13651365
}
13661366

1367-
InternalIterator* DBImpl::NewInternalIterator(
1368-
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence,
1369-
ColumnFamilyHandle* column_family, bool allow_unprepared_value) {
1367+
InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
1368+
Arena* arena,
1369+
RangeDelAggregator* range_del_agg,
1370+
SequenceNumber sequence,
1371+
ColumnFamilyHandle* column_family,
1372+
bool allow_unprepared_value) {
13701373
ColumnFamilyData* cfd;
13711374
if (column_family == nullptr) {
13721375
cfd = default_cf_handle_->cfd();
@@ -1378,9 +1381,8 @@ InternalIterator* DBImpl::NewInternalIterator(
13781381
mutex_.Lock();
13791382
SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
13801383
mutex_.Unlock();
1381-
ReadOptions roptions;
1382-
return NewInternalIterator(roptions, cfd, super_version, arena, range_del_agg,
1383-
sequence, allow_unprepared_value);
1384+
return NewInternalIterator(read_options, cfd, super_version, arena,
1385+
range_del_agg, sequence, allow_unprepared_value);
13841386
}
13851387

13861388
void DBImpl::SchedulePurge() {
@@ -2829,10 +2831,10 @@ ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options,
28292831
sv->version_number, read_callback, this, cfd, allow_blob,
28302832
read_options.snapshot != nullptr ? false : allow_refresh);
28312833

2832-
InternalIterator* internal_iter =
2833-
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
2834-
db_iter->GetRangeDelAggregator(), snapshot,
2835-
/* allow_unprepared_value */ true);
2834+
InternalIterator* internal_iter = NewInternalIterator(
2835+
db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(),
2836+
db_iter->GetRangeDelAggregator(), snapshot,
2837+
/* allow_unprepared_value */ true);
28362838
db_iter->SetIterUnderDBIter(internal_iter);
28372839

28382840
return db_iter;

db/db_impl/db_impl.h

+11-5
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,10 @@ class DBImpl : public DB {
594594
// the value and so will require PrepareValue() to be called before value();
595595
// allow_unprepared_value = false is convenient when this optimization is not
596596
// useful, e.g. when reading the whole column family.
597+
// @param read_options Must outlive the returned iterator.
597598
InternalIterator* NewInternalIterator(
598-
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence,
599+
const ReadOptions& read_options, Arena* arena,
600+
RangeDelAggregator* range_del_agg, SequenceNumber sequence,
599601
ColumnFamilyHandle* column_family = nullptr,
600602
bool allow_unprepared_value = false);
601603

@@ -721,10 +723,14 @@ class DBImpl : public DB {
721723

722724
const WriteController& write_controller() { return write_controller_; }
723725

724-
InternalIterator* NewInternalIterator(
725-
const ReadOptions&, ColumnFamilyData* cfd, SuperVersion* super_version,
726-
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence,
727-
bool allow_unprepared_value);
726+
// @param read_options Must outlive the returned iterator.
727+
InternalIterator* NewInternalIterator(const ReadOptions& read_options,
728+
ColumnFamilyData* cfd,
729+
SuperVersion* super_version,
730+
Arena* arena,
731+
RangeDelAggregator* range_del_agg,
732+
SequenceNumber sequence,
733+
bool allow_unprepared_value);
728734

729735
// hollow transactions shell used for recovery.
730736
// these will then be passed to TransactionDB so that

db/db_impl/db_impl_readonly.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
8686
read_seq,
8787
super_version->mutable_cf_options.max_sequential_skip_in_iterations,
8888
super_version->version_number, read_callback);
89-
auto internal_iter =
90-
NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(),
91-
db_iter->GetRangeDelAggregator(), read_seq,
92-
/* allow_unprepared_value */ true);
89+
auto internal_iter = NewInternalIterator(
90+
db_iter->GetReadOptions(), cfd, super_version, db_iter->GetArena(),
91+
db_iter->GetRangeDelAggregator(), read_seq,
92+
/* allow_unprepared_value */ true);
9393
db_iter->SetIterUnderDBIter(internal_iter);
9494
return db_iter;
9595
}
@@ -118,10 +118,10 @@ Status DBImplReadOnly::NewIterators(
118118
env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, read_seq,
119119
sv->mutable_cf_options.max_sequential_skip_in_iterations,
120120
sv->version_number, read_callback);
121-
auto* internal_iter =
122-
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
123-
db_iter->GetRangeDelAggregator(), read_seq,
124-
/* allow_unprepared_value */ true);
121+
auto* internal_iter = NewInternalIterator(
122+
db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(),
123+
db_iter->GetRangeDelAggregator(), read_seq,
124+
/* allow_unprepared_value */ true);
125125
db_iter->SetIterUnderDBIter(internal_iter);
126126
iterators->push_back(db_iter);
127127
}

db/db_impl/db_impl_secondary.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -415,10 +415,10 @@ ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl(
415415
snapshot,
416416
super_version->mutable_cf_options.max_sequential_skip_in_iterations,
417417
super_version->version_number, read_callback);
418-
auto internal_iter =
419-
NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(),
420-
db_iter->GetRangeDelAggregator(), snapshot,
421-
/* allow_unprepared_value */ true);
418+
auto internal_iter = NewInternalIterator(
419+
db_iter->GetReadOptions(), cfd, super_version, db_iter->GetArena(),
420+
db_iter->GetRangeDelAggregator(), snapshot,
421+
/* allow_unprepared_value */ true);
422422
db_iter->SetIterUnderDBIter(internal_iter);
423423
return db_iter;
424424
}

db/db_test_util.cc

+9-6
Original file line numberDiff line numberDiff line change
@@ -907,12 +907,13 @@ std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) {
907907
InternalKeyComparator icmp(options.comparator);
908908
ReadRangeDelAggregator range_del_agg(&icmp,
909909
kMaxSequenceNumber /* upper_bound */);
910+
ReadOptions read_options;
910911
ScopedArenaIterator iter;
911912
if (cf == 0) {
912-
iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg,
913+
iter.set(dbfull()->NewInternalIterator(read_options, &arena, &range_del_agg,
913914
kMaxSequenceNumber));
914915
} else {
915-
iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg,
916+
iter.set(dbfull()->NewInternalIterator(read_options, &arena, &range_del_agg,
916917
kMaxSequenceNumber, handles_[cf]));
917918
}
918919
InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
@@ -1322,12 +1323,13 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) {
13221323
kMaxSequenceNumber /* upper_bound */);
13231324
// This should be defined after range_del_agg so that it destructs the
13241325
// assigned iterator before it range_del_agg is already destructed.
1326+
ReadOptions read_options;
13251327
ScopedArenaIterator iter;
13261328
if (cf != 0) {
1327-
iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg,
1329+
iter.set(dbfull()->NewInternalIterator(read_options, &arena, &range_del_agg,
13281330
kMaxSequenceNumber, handles_[cf]));
13291331
} else {
1330-
iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg,
1332+
iter.set(dbfull()->NewInternalIterator(read_options, &arena, &range_del_agg,
13311333
kMaxSequenceNumber));
13321334
}
13331335
iter->SeekToFirst();
@@ -1530,8 +1532,9 @@ void DBTestBase::VerifyDBInternal(
15301532
InternalKeyComparator icmp(last_options_.comparator);
15311533
ReadRangeDelAggregator range_del_agg(&icmp,
15321534
kMaxSequenceNumber /* upper_bound */);
1533-
auto iter =
1534-
dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber);
1535+
ReadOptions read_options;
1536+
auto iter = dbfull()->NewInternalIterator(read_options, &arena,
1537+
&range_del_agg, kMaxSequenceNumber);
15351538
iter->SeekToFirst();
15361539
for (auto p : true_data) {
15371540
ASSERT_TRUE(iter->Valid());

db/table_cache.h

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class TableCache {
6060
// the returned iterator. The returned "*table_reader_ptr" object is owned
6161
// by the cache and should not be deleted, and is valid for as long as the
6262
// returned iterator is live.
63+
// @param options Must outlive the returned iterator.
6364
// @param range_del_agg If non-nullptr, adds range deletions to the
6465
// aggregator. If an error occurs, returns it in a NewErrorInternalIterator
6566
// @param for_compaction If true, a new TableReader may be allocated (but

db/version_set.cc

+4-11
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,7 @@ namespace {
876876

877877
class LevelIterator final : public InternalIterator {
878878
public:
879+
// @param read_options Must outlive this iterator.
879880
LevelIterator(TableCache* table_cache, const ReadOptions& read_options,
880881
const FileOptions& file_options,
881882
const InternalKeyComparator& icomparator,
@@ -1020,7 +1021,7 @@ class LevelIterator final : public InternalIterator {
10201021
}
10211022

10221023
TableCache* table_cache_;
1023-
const ReadOptions read_options_;
1024+
const ReadOptions& read_options_;
10241025
const FileOptions& file_options_;
10251026
const InternalKeyComparator& icomparator_;
10261027
const UserComparatorWrapper user_comparator_;
@@ -5670,18 +5671,10 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_table_files,
56705671
}
56715672

56725673
InternalIterator* VersionSet::MakeInputIterator(
5673-
const Compaction* c, RangeDelAggregator* range_del_agg,
5674+
const ReadOptions& read_options, const Compaction* c,
5675+
RangeDelAggregator* range_del_agg,
56745676
const FileOptions& file_options_compactions) {
56755677
auto cfd = c->column_family_data();
5676-
ReadOptions read_options;
5677-
read_options.verify_checksums = true;
5678-
read_options.fill_cache = false;
5679-
// Compaction iterators shouldn't be confined to a single prefix.
5680-
// Compactions use Seek() for
5681-
// (a) concurrent compactions,
5682-
// (b) CompactionFilter::Decision::kRemoveAndSkipUntil.
5683-
read_options.total_order_seek = true;
5684-
56855678
// Level-0 files have to be merged together. For other levels,
56865679
// we will make a concatenating iterator per level.
56875680
// TODO(opt): use concatenating iterator for level-0 if there is no overlap

0 commit comments

Comments
 (0)