Skip to content

Commit 0af157f

Browse files
author
Feng Zhu
committed
Implement full filter for block based table.
Summary: 1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file. 2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter. 3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type. 4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h. 5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc Benchmark: base commit 1d23b5c Command: db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1 Read QPS increase for about 30% from 2230002 to 2991411. Test Plan: make all check valgrind db_test db_stress --use_block_based_filter = 0 ./auto_sanity_test.sh Reviewers: igor, yhchiang, ljin, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D20979
1 parent 9360cc6 commit 0af157f

23 files changed

+1709
-484
lines changed

Makefile

+7-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ TESTS = \
9090
blob_store_test \
9191
filelock_test \
9292
filename_test \
93-
filter_block_test \
93+
block_based_filter_block_test \
94+
full_filter_block_test \
9495
histogram_test \
9596
log_test \
9697
manual_compaction_test \
@@ -393,8 +394,11 @@ rate_limiter_test: util/rate_limiter_test.o $(LIBOBJECTS) $(TESTHARNESS)
393394
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
394395
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
395396

396-
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
397-
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
397+
block_based_filter_block_test: table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
398+
$(CXX) table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
399+
400+
full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
401+
$(CXX) table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
398402

399403
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
400404
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

db/c.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ struct rocksdb_compactionfilter_t : public CompactionFilter {
118118
const Slice& existing_value,
119119
std::string* new_value,
120120
bool* value_changed) const {
121-
char* c_new_value = NULL;
121+
char* c_new_value = nullptr;
122122
size_t new_value_length = 0;
123123
unsigned char c_value_changed = 0;
124124
unsigned char result = (*filter_)(

db/db_bench.cc

+7-5
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ int main() {
3939
#include "rocksdb/memtablerep.h"
4040
#include "rocksdb/write_batch.h"
4141
#include "rocksdb/slice.h"
42+
#include "rocksdb/filter_policy.h"
4243
#include "rocksdb/slice_transform.h"
43-
#include "rocksdb/statistics.h"
4444
#include "rocksdb/perf_context.h"
4545
#include "port/port.h"
4646
#include "port/stack_trace.h"
@@ -553,7 +553,9 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table.");
553553
DEFINE_bool(use_hash_search, false, "if use kHashSearch "
554554
"instead of kBinarySearch. "
555555
"This is valid if only we use BlockTable");
556-
556+
DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter "
557+
"instead of kFullFilter for filter block. "
558+
"This is valid if only we use BlockTable");
557559
DEFINE_string(merge_operator, "", "The merge operator to use with the database."
558560
"If a new merge operator is specified, be sure to use fresh"
559561
" database The possible merge operators are defined in"
@@ -1076,9 +1078,9 @@ class Benchmark {
10761078
(FLAGS_cache_numshardbits >= 1 ?
10771079
NewLRUCache(FLAGS_compressed_cache_size, FLAGS_cache_numshardbits) :
10781080
NewLRUCache(FLAGS_compressed_cache_size)) : nullptr),
1079-
filter_policy_(FLAGS_bloom_bits >= 0
1080-
? NewBloomFilterPolicy(FLAGS_bloom_bits)
1081-
: nullptr),
1081+
filter_policy_(FLAGS_bloom_bits >= 0 ?
1082+
NewBloomFilterPolicy(FLAGS_bloom_bits, FLAGS_use_block_based_filter)
1083+
: nullptr),
10821084
prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
10831085
num_(FLAGS_num),
10841086
value_size_(FLAGS_value_size),

db/db_test.cc

+170-54
Original file line numberDiff line numberDiff line change
@@ -324,21 +324,22 @@ class DBTest {
324324
kHashCuckoo = 7,
325325
kMergePut = 8,
326326
kFilter = 9,
327-
kUncompressed = 10,
328-
kNumLevel_3 = 11,
329-
kDBLogDir = 12,
330-
kWalDir = 13,
331-
kManifestFileSize = 14,
332-
kCompactOnFlush = 15,
333-
kPerfOptions = 16,
334-
kDeletesFilterFirst = 17,
335-
kHashSkipList = 18,
336-
kUniversalCompaction = 19,
337-
kCompressedBlockCache = 20,
338-
kInfiniteMaxOpenFiles = 21,
339-
kxxHashChecksum = 22,
340-
kFIFOCompaction = 23,
341-
kEnd = 24
327+
kFullFilter = 10,
328+
kUncompressed = 11,
329+
kNumLevel_3 = 12,
330+
kDBLogDir = 13,
331+
kWalDir = 14,
332+
kManifestFileSize = 15,
333+
kCompactOnFlush = 16,
334+
kPerfOptions = 17,
335+
kDeletesFilterFirst = 18,
336+
kHashSkipList = 19,
337+
kUniversalCompaction = 20,
338+
kCompressedBlockCache = 21,
339+
kInfiniteMaxOpenFiles = 22,
340+
kxxHashChecksum = 23,
341+
kFIFOCompaction = 24,
342+
kEnd = 25
342343
};
343344
int option_config_;
344345

@@ -448,6 +449,30 @@ class DBTest {
448449
}
449450
}
450451

452+
// Switch between different filter policy
453+
// Jump from kDefault to kFilter to kFullFilter
454+
bool ChangeFilterOptions(Options* prev_options = nullptr) {
455+
if (option_config_ == kDefault) {
456+
option_config_ = kFilter;
457+
if (prev_options == nullptr) {
458+
prev_options = &last_options_;
459+
}
460+
Destroy(prev_options);
461+
TryReopen();
462+
return true;
463+
} else if (option_config_ == kFilter) {
464+
option_config_ = kFullFilter;
465+
if (prev_options == nullptr) {
466+
prev_options = &last_options_;
467+
}
468+
Destroy(prev_options);
469+
TryReopen();
470+
return true;
471+
} else {
472+
return false;
473+
}
474+
}
475+
451476
// Return the current option configuration.
452477
Options CurrentOptions(
453478
const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
@@ -486,7 +511,10 @@ class DBTest {
486511
options.merge_operator = MergeOperators::CreatePutOperator();
487512
break;
488513
case kFilter:
489-
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
514+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
515+
break;
516+
case kFullFilter:
517+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
490518
break;
491519
case kUncompressed:
492520
options.compression = kNoCompression;
@@ -5744,6 +5772,92 @@ TEST(DBTest, BloomFilter) {
57445772
} while (ChangeCompactOptions());
57455773
}
57465774

5775+
TEST(DBTest, BloomFilterRate) {
5776+
while (ChangeFilterOptions()) {
5777+
Options options = CurrentOptions();
5778+
options.statistics = rocksdb::CreateDBStatistics();
5779+
CreateAndReopenWithCF({"pikachu"}, &options);
5780+
5781+
const int maxKey = 10000;
5782+
for (int i = 0; i < maxKey; i++) {
5783+
ASSERT_OK(Put(1, Key(i), Key(i)));
5784+
}
5785+
// Add a large key to make the file contain wide range
5786+
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
5787+
Flush(1);
5788+
5789+
// Check if they can be found
5790+
for (int i = 0; i < maxKey; i++) {
5791+
ASSERT_EQ(Key(i), Get(1, Key(i)));
5792+
}
5793+
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
5794+
5795+
// Check if filter is useful
5796+
for (int i = 0; i < maxKey; i++) {
5797+
ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
5798+
}
5799+
ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
5800+
}
5801+
}
5802+
5803+
TEST(DBTest, BloomFilterCompatibility) {
5804+
Options options;
5805+
options.statistics = rocksdb::CreateDBStatistics();
5806+
BlockBasedTableOptions table_options;
5807+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
5808+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5809+
5810+
// Create with block based filter
5811+
CreateAndReopenWithCF({"pikachu"}, &options);
5812+
5813+
const int maxKey = 10000;
5814+
for (int i = 0; i < maxKey; i++) {
5815+
ASSERT_OK(Put(1, Key(i), Key(i)));
5816+
}
5817+
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
5818+
Flush(1);
5819+
5820+
// Check db with full filter
5821+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
5822+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5823+
ReopenWithColumnFamilies({"default", "pikachu"}, &options);
5824+
5825+
// Check if they can be found
5826+
for (int i = 0; i < maxKey; i++) {
5827+
ASSERT_EQ(Key(i), Get(1, Key(i)));
5828+
}
5829+
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
5830+
}
5831+
5832+
TEST(DBTest, BloomFilterReverseCompatibility) {
5833+
Options options;
5834+
options.statistics = rocksdb::CreateDBStatistics();
5835+
BlockBasedTableOptions table_options;
5836+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
5837+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5838+
5839+
// Create with full filter
5840+
CreateAndReopenWithCF({"pikachu"}, &options);
5841+
5842+
const int maxKey = 10000;
5843+
for (int i = 0; i < maxKey; i++) {
5844+
ASSERT_OK(Put(1, Key(i), Key(i)));
5845+
}
5846+
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
5847+
Flush(1);
5848+
5849+
// Check db with block_based filter
5850+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
5851+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5852+
ReopenWithColumnFamilies({"default", "pikachu"}, &options);
5853+
5854+
// Check if they can be found
5855+
for (int i = 0; i < maxKey; i++) {
5856+
ASSERT_EQ(Key(i), Get(1, Key(i)));
5857+
}
5858+
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
5859+
}
5860+
57475861
TEST(DBTest, SnapshotFiles) {
57485862
do {
57495863
Options options = CurrentOptions();
@@ -7194,47 +7308,49 @@ void PrefixScanInit(DBTest *dbtest) {
71947308
} // namespace
71957309

71967310
TEST(DBTest, PrefixScan) {
7197-
int count;
7198-
Slice prefix;
7199-
Slice key;
7200-
char buf[100];
7201-
Iterator* iter;
7202-
snprintf(buf, sizeof(buf), "03______:");
7203-
prefix = Slice(buf, 8);
7204-
key = Slice(buf, 9);
7205-
// db configs
7206-
env_->count_random_reads_ = true;
7207-
Options options = CurrentOptions();
7208-
options.env = env_;
7209-
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
7210-
options.disable_auto_compactions = true;
7211-
options.max_background_compactions = 2;
7212-
options.create_if_missing = true;
7213-
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
7311+
while (ChangeFilterOptions()) {
7312+
int count;
7313+
Slice prefix;
7314+
Slice key;
7315+
char buf[100];
7316+
Iterator* iter;
7317+
snprintf(buf, sizeof(buf), "03______:");
7318+
prefix = Slice(buf, 8);
7319+
key = Slice(buf, 9);
7320+
// db configs
7321+
env_->count_random_reads_ = true;
7322+
Options options = CurrentOptions();
7323+
options.env = env_;
7324+
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
7325+
options.disable_auto_compactions = true;
7326+
options.max_background_compactions = 2;
7327+
options.create_if_missing = true;
7328+
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
72147329

7215-
BlockBasedTableOptions table_options;
7216-
table_options.no_block_cache = true;
7217-
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
7218-
table_options.whole_key_filtering = false;
7219-
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7330+
BlockBasedTableOptions table_options;
7331+
table_options.no_block_cache = true;
7332+
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
7333+
table_options.whole_key_filtering = false;
7334+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
72207335

7221-
// 11 RAND I/Os
7222-
DestroyAndReopen(&options);
7223-
PrefixScanInit(this);
7224-
count = 0;
7225-
env_->random_read_counter_.Reset();
7226-
iter = db_->NewIterator(ReadOptions());
7227-
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
7228-
if (! iter->key().starts_with(prefix)) {
7229-
break;
7336+
// 11 RAND I/Os
7337+
DestroyAndReopen(&options);
7338+
PrefixScanInit(this);
7339+
count = 0;
7340+
env_->random_read_counter_.Reset();
7341+
iter = db_->NewIterator(ReadOptions());
7342+
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
7343+
if (! iter->key().starts_with(prefix)) {
7344+
break;
7345+
}
7346+
count++;
72307347
}
7231-
count++;
7232-
}
7233-
ASSERT_OK(iter->status());
7234-
delete iter;
7235-
ASSERT_EQ(count, 2);
7236-
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
7237-
Close();
7348+
ASSERT_OK(iter->status());
7349+
delete iter;
7350+
ASSERT_EQ(count, 2);
7351+
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
7352+
Close();
7353+
} // end of while
72387354
}
72397355

72407356
TEST(DBTest, TailingIteratorSingle) {

0 commit comments

Comments
 (0)