Skip to content

Commit 66dc033

Browse files
committed
Temporarily disable caching index/filter blocks
Summary: Mixing index/filter blocks with data blocks resulted in some known issues. To make sure in next release our users won't be affected, we added a new option in BlockBasedTableFactory::TableOption to conceal this functionality for now. This patch also introduced a BlockBasedTableReader::OpenOptions, which avoids the "infinite" growth of parameters in BlockBasedTableReader::Open(). Test Plan: make check Reviewers: haobo, sdong, igor, dhruba Reviewed By: igor CC: leveldb, tnovak Differential Revision: https://reviews.facebook.net/D15327
1 parent d24961b commit 66dc033

7 files changed

+87
-64
lines changed

db/db_test.cc

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "db/filename.h"
1818
#include "db/version_set.h"
1919
#include "db/write_batch_internal.h"
20+
#include "table/block_based_table_factory.h"
2021
#include "rocksdb/cache.h"
2122
#include "rocksdb/compaction_filter.h"
2223
#include "rocksdb/env.h"
@@ -732,6 +733,9 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
732733
options.filter_policy = filter_policy.get();
733734
options.create_if_missing = true;
734735
options.statistics = rocksdb::CreateDBStatistics();
736+
BlockBasedTableOptions table_options;
737+
table_options.cache_index_and_filter_blocks = true;
738+
options.table_factory.reset(new BlockBasedTableFactory(table_options));
735739
DestroyAndReopen(&options);
736740

737741
ASSERT_OK(db_->Put(WriteOptions(), "key", "val"));

table/block_based_table_factory.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ namespace rocksdb {
2020

2121
Status BlockBasedTableFactory::GetTableReader(
2222
const Options& options, const EnvOptions& soptions,
23-
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
23+
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
2424
unique_ptr<TableReader>* table_reader) const {
25-
return BlockBasedTable::Open(options, soptions, std::move(file), file_size,
26-
table_reader);
25+
return BlockBasedTable::Open(options, soptions, table_options_,
26+
std::move(file), file_size, table_reader);
2727
}
2828

2929
TableBuilder* BlockBasedTableFactory::GetTableBuilder(

table/block_based_table_factory.h

+11-25
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "rocksdb/flush_block_policy.h"
1515
#include "rocksdb/options.h"
1616
#include "rocksdb/table.h"
17+
#include "table/block_based_table_options.h"
1718

1819
namespace rocksdb {
1920

@@ -30,40 +31,25 @@ class BlockBasedTable;
3031
class BlockBasedTableBuilder;
3132

3233
class BlockBasedTableFactory: public TableFactory {
33-
public:
34-
struct TableOptions {
35-
// @flush_block_policy_factory creates the instances of flush block policy.
36-
// which provides a configurable way to determine when to flush a block in
37-
// the block based tables. If not set, table builder will use the default
38-
// block flush policy, which cut blocks by block size (please refer to
39-
// `FlushBlockBySizePolicy`).
40-
std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
41-
};
34+
public:
35+
BlockBasedTableFactory() : BlockBasedTableFactory(BlockBasedTableOptions()) {}
36+
explicit BlockBasedTableFactory(const BlockBasedTableOptions& table_options)
37+
: table_options_(table_options) {}
4238

43-
BlockBasedTableFactory() : BlockBasedTableFactory(TableOptions()) { }
44-
BlockBasedTableFactory(const TableOptions& table_options):
45-
table_options_(table_options) {
46-
}
39+
~BlockBasedTableFactory() {}
4740

48-
~BlockBasedTableFactory() {
49-
}
50-
51-
const char* Name() const override {
52-
return "BlockBasedTable";
53-
}
41+
const char* Name() const override { return "BlockBasedTable"; }
5442

5543
Status GetTableReader(const Options& options, const EnvOptions& soptions,
56-
unique_ptr<RandomAccessFile> && file,
57-
uint64_t file_size,
44+
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
5845
unique_ptr<TableReader>* table_reader) const override;
5946

6047
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
61-
CompressionType compression_type) const
62-
override;
48+
CompressionType compression_type)
49+
const override;
6350

6451
private:
65-
TableOptions table_options_;
52+
BlockBasedTableOptions table_options_;
6653
};
6754

68-
6955
} // namespace rocksdb

table/block_based_table_options.h

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
2+
// This source code is licensed under the BSD-style license found in the
3+
// LICENSE file in the root directory of this source tree. An additional grant
4+
// of patent rights can be found in the PATENTS file in the same directory.
5+
6+
#pragma once
7+
#include <memory>
8+
9+
namespace rocksdb {
10+
11+
class FlushBlockPolicyFactory;
12+
13+
struct BlockBasedTableOptions {
14+
// @flush_block_policy_factory creates the instances of flush block policy.
15+
// which provides a configurable way to determine when to flush a block in
16+
// the block based tables. If not set, table builder will use the default
17+
// block flush policy, which cut blocks by block size (please refer to
18+
// `FlushBlockBySizePolicy`).
19+
std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
20+
21+
// TODO(kailiu) Temporarily disable this feature by making the default value
22+
// to be false. Also in master branch, this file is non-public so no user
23+
// will be able to change the value of `cache_index_and_filter_blocks`.
24+
//
25+
// Indicating if we'd put index/filter blocks to the block cache.
26+
// If not specified, each "table reader" object will pre-load index/filter
27+
// block during table initialization.
28+
bool cache_index_and_filter_blocks = false;
29+
};
30+
31+
} // namespace rocksdb

table/block_based_table_reader.cc

+26-26
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "util/coding.h"
2727
#include "util/perf_context_imp.h"
2828
#include "util/stop_watch.h"
29+
#include "table/block_based_table_options.h"
2930

3031
namespace rocksdb {
3132

@@ -45,9 +46,9 @@ struct BlockBasedTable::Rep {
4546
Status status;
4647
unique_ptr<RandomAccessFile> file;
4748
char cache_key_prefix[kMaxCacheKeyPrefixSize];
48-
size_t cache_key_prefix_size;
49+
size_t cache_key_prefix_size = 0;
4950
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
50-
size_t compressed_cache_key_prefix_size;
51+
size_t compressed_cache_key_prefix_size = 0;
5152

5253
// Handle to metaindex_block: saved from footer
5354
BlockHandle metaindex_handle;
@@ -220,20 +221,21 @@ Cache::Handle* GetFromBlockCache(
220221

221222
} // end of anonymous namespace
222223

223-
Status BlockBasedTable::Open(const Options& options,
224-
const EnvOptions& soptions,
225-
unique_ptr<RandomAccessFile> && file,
226-
uint64_t size,
224+
Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
225+
const BlockBasedTableOptions& table_options,
226+
unique_ptr<RandomAccessFile>&& file,
227+
uint64_t file_size,
227228
unique_ptr<TableReader>* table_reader) {
228229
table_reader->reset();
229-
if (size < Footer::kEncodedLength) {
230+
231+
if (file_size < Footer::kEncodedLength) {
230232
return Status::InvalidArgument("file is too short to be an sstable");
231233
}
232234

233235
char footer_space[Footer::kEncodedLength];
234236
Slice footer_input;
235-
Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
236-
&footer_input, footer_space);
237+
Status s = file->Read(file_size - Footer::kEncodedLength,
238+
Footer::kEncodedLength, &footer_input, footer_space);
237239
if (!s.ok()) return s;
238240

239241
// Check that we actually read the whole footer from the file. It may be
@@ -277,11 +279,21 @@ Status BlockBasedTable::Open(const Options& options,
277279
}
278280
}
279281

280-
// Initialize index/filter blocks. If block cache is not specified,
281-
// these blocks will be kept in member variables in Rep, which will
282-
// reside in the memory as long as this table object is alive; otherwise
283-
// they will be added to block cache.
284-
if (!options.block_cache) {
282+
// Will use block cache for index/filter blocks access?
283+
if (options.block_cache && table_options.cache_index_and_filter_blocks) {
284+
// Call IndexBlockReader() to implicitly add index to the block_cache
285+
unique_ptr<Iterator> iter(new_table->IndexBlockReader(ReadOptions()));
286+
s = iter->status();
287+
288+
if (s.ok()) {
289+
// Call GetFilter() to implicitly add filter to the block_cache
290+
auto filter_entry = new_table->GetFilter();
291+
filter_entry.Release(options.block_cache.get());
292+
}
293+
} else {
294+
// If we don't use block cache for index/filter blocks access, we'll
295+
// pre-load these blocks, which will kept in member variables in Rep
296+
// and with a same life-time as this table object.
285297
Block* index_block = nullptr;
286298
// TODO: we never really verify check sum for index block
287299
s = ReadBlockFromFile(
@@ -309,18 +321,7 @@ Status BlockBasedTable::Open(const Options& options,
309321
} else {
310322
delete index_block;
311323
}
312-
} else {
313-
// Call IndexBlockReader() to implicitly add index to the block_cache
314-
unique_ptr<Iterator> iter(
315-
new_table->IndexBlockReader(ReadOptions())
316-
);
317-
s = iter->status();
318324

319-
if (s.ok()) {
320-
// Call GetFilter() to implicitly add filter to the block_cache
321-
auto filter_entry = new_table->GetFilter();
322-
filter_entry.Release(options.block_cache.get());
323-
}
324325
}
325326

326327
if (s.ok()) {
@@ -836,7 +837,6 @@ BlockBasedTable::GetFilter(bool no_io) const {
836837
// Get the iterator from the index block.
837838
Iterator* BlockBasedTable::IndexBlockReader(const ReadOptions& options) const {
838839
if (rep_->index_block) {
839-
assert (!rep_->options.block_cache);
840840
return rep_->index_block->NewIterator(rep_->options.comparator);
841841
}
842842

table/block_based_table_reader.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ struct ReadOptions;
2929
class TableCache;
3030
class TableReader;
3131
class FilterBlockReader;
32+
struct BlockBasedTableOptions;
3233

3334
using std::unique_ptr;
3435

@@ -50,10 +51,9 @@ class BlockBasedTable : public TableReader {
5051
// to nullptr and returns a non-ok status.
5152
//
5253
// *file must remain live while this Table is in use.
53-
static Status Open(const Options& options,
54-
const EnvOptions& soptions,
55-
unique_ptr<RandomAccessFile>&& file,
56-
uint64_t file_size,
54+
static Status Open(const Options& db_options, const EnvOptions& env_options,
55+
const BlockBasedTableOptions& table_options,
56+
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
5757
unique_ptr<TableReader>* table_reader);
5858

5959
bool PrefixMayMatch(const Slice& internal_prefix) override;

table/table_test.cc

+8-6
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,12 @@ class BlockConstructor: public Constructor {
243243

244244
class BlockBasedTableConstructor: public Constructor {
245245
public:
246-
explicit BlockBasedTableConstructor(
247-
const Comparator* cmp)
248-
: Constructor(cmp) {
249-
}
246+
explicit BlockBasedTableConstructor(const Comparator* cmp)
247+
: Constructor(cmp) {}
250248
~BlockBasedTableConstructor() {
251249
Reset();
252250
}
251+
253252
virtual Status FinishImpl(const Options& options, const KVMap& data) {
254253
Reset();
255254
sink_.reset(new StringSink());
@@ -277,7 +276,6 @@ class BlockBasedTableConstructor: public Constructor {
277276
// Open the table
278277
uniq_id_ = cur_uniq_id_++;
279278
source_.reset(new StringSource(sink_->contents(), uniq_id_));
280-
unique_ptr<TableFactory> table_factory;
281279
return options.table_factory->GetTableReader(options, soptions,
282280
std::move(source_),
283281
sink_->contents().size(),
@@ -979,6 +977,11 @@ TEST(TableTest, BlockCacheTest) {
979977
options.create_if_missing = true;
980978
options.statistics = CreateDBStatistics();
981979
options.block_cache = NewLRUCache(1024);
980+
981+
// Enable the cache for index/filter blocks
982+
BlockBasedTableOptions table_options;
983+
table_options.cache_index_and_filter_blocks = true;
984+
options.table_factory.reset(new BlockBasedTableFactory(table_options));
982985
std::vector<std::string> keys;
983986
KVMap kvmap;
984987

@@ -1292,7 +1295,6 @@ TEST(MemTableTest, Simple) {
12921295
delete memtable->Unref();
12931296
}
12941297

1295-
12961298
} // namespace rocksdb
12971299

12981300
int main(int argc, char** argv) {

0 commit comments

Comments
 (0)