Skip to content

Commit 718488a

Browse files
committed
Add BloomFilter to PlainTableIterator::Seek()
Summary: This patch adds a simple bloom filter in PlainTableIterator::Seek() Test Plan: N/A Reviewers: CC: Task ID: # Blame Rev:
1 parent b135d01 commit 718488a

6 files changed

+71
-16
lines changed

db/plain_table_db_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class PlainTableDBTest {
5959
// Return the current option configuration.
6060
Options CurrentOptions() {
6161
Options options;
62-
options.table_factory.reset(new PlainTableFactory(16, 8));
62+
options.table_factory.reset(new PlainTableFactory(16, 8, 2, 0.8));
6363
options.allow_mmap_reads = true;
6464
return options;
6565
}

include/rocksdb/plain_table_factory.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,15 @@ class PlainTableFactory: public TableFactory {
4747
public:
4848
~PlainTableFactory() {
4949
}
50-
PlainTableFactory(int user_key_size, int key_prefix_len) :
51-
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
50+
// user_key_size is the length of the user key. key_prefix_len is the
51+
// length of the prefix used for im-memory indexes. bloom_num_bits is
52+
// number of bits is used for bloom filer per key. hash_table_ratio is
53+
// the desired ultilization of the hash table used for prefix hashing.
54+
// hash_table_ratio = number of prefixes / #buckets in the hash table
55+
PlainTableFactory(int user_key_size, int key_prefix_len,
56+
int bloom_num_bits = 0, double hash_table_ratio = 0.75) :
57+
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
58+
bloom_num_bits_(bloom_num_bits), hash_table_ratio_(hash_table_ratio) {
5259
}
5360
const char* Name() const override {
5461
return "PlainTable";
@@ -64,6 +71,8 @@ class PlainTableFactory: public TableFactory {
6471
private:
6572
int user_key_size_;
6673
int key_prefix_len_;
74+
int bloom_num_bits_;
75+
double hash_table_ratio_;
6776
};
6877

6978
} // namespace rocksdb

table/plain_table_factory.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ Status PlainTableFactory::GetTableReader(const Options& options,
1919
unique_ptr<TableReader>* table)
2020
const {
2121
return PlainTableReader::Open(options, soptions, std::move(file), file_size,
22-
table, user_key_size_, key_prefix_len_);
22+
table, user_key_size_, key_prefix_len_,
23+
bloom_num_bits_, hash_table_ratio_);
2324
}
2425

2526
TableBuilder* PlainTableFactory::GetTableBuilder(
2627
const Options& options, WritableFile* file,
2728
CompressionType compression_type) const {
28-
return new PlainTableBuilder(options, file, user_key_size_,
29-
key_prefix_len_);
29+
return new PlainTableBuilder(options, file, user_key_size_, key_prefix_len_);
3030
}
3131
} // namespace rocksdb

table/plain_table_reader.cc

+46-7
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,26 @@ namespace rocksdb {
4040

4141
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
4242
uint64_t file_size, int user_key_size,
43-
int key_prefix_len) :
44-
soptions_(storage_options), file_size_(file_size),
45-
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
43+
int key_prefix_len, int bloom_bits_per_key,
44+
double hash_table_ratio) :
45+
hash_table_size_(0), soptions_(storage_options), file_size_(file_size),
46+
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
47+
hash_table_ratio_(hash_table_ratio) {
48+
if (bloom_bits_per_key > 0) {
49+
filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key);
50+
} else {
51+
filter_policy_ = nullptr;
52+
}
4653
hash_table_ = nullptr;
4754
}
4855

4956
PlainTableReader::~PlainTableReader() {
5057
if (hash_table_ != nullptr) {
5158
delete[] hash_table_;
5259
}
60+
if (filter_policy_ != nullptr) {
61+
delete filter_policy_;
62+
}
5363
}
5464

5565
Status PlainTableReader::Open(const Options& options,
@@ -58,12 +68,16 @@ Status PlainTableReader::Open(const Options& options,
5868
uint64_t file_size,
5969
unique_ptr<TableReader>* table_reader,
6070
const int user_key_size,
61-
const int key_prefix_len) {
71+
const int key_prefix_len,
72+
const int bloom_num_bits,
73+
double hash_table_ratio) {
6274
assert(options.allow_mmap_reads);
6375

6476
PlainTableReader* t = new PlainTableReader(soptions, file_size,
6577
user_key_size,
66-
key_prefix_len);
78+
key_prefix_len,
79+
bloom_num_bits,
80+
hash_table_ratio);
6781
t->file_ = std::move(file);
6882
t->options_ = options;
6983
Status s = t->PopulateIndex(file_size);
@@ -146,14 +160,25 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
146160
delete[] hash_table_;
147161
}
148162
// Make the hash table 3/5 full
149-
hash_table_size_ = tmp_index.size() * 1.66;
163+
std::vector<Slice> filter_entries(0); // for creating bloom filter;
164+
if (filter_policy_ != nullptr) {
165+
filter_entries.resize(tmp_index.size());
166+
}
167+
double hash_table_size_multipier =
168+
(hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_;
169+
hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1;
150170
hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_];
151171
for (int i = 0; i < hash_table_size_; i++) {
152172
memcpy(GetHashTableBucketPtr(i) + key_prefix_len_, &file_size_,
153173
kOffsetLen);
154174
}
155175

176+
size_t count = 0;
156177
for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) {
178+
if (filter_policy_ != nullptr) {
179+
filter_entries[count++] = it->first;
180+
}
181+
157182
int bucket = GetHashTableBucket(it->first);
158183
uint64_t* hash_value;
159184
while (true) {
@@ -168,6 +193,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
168193
memcpy(bucket_ptr, it->first.data(), key_prefix_len_);
169194
memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen);
170195
}
196+
if (filter_policy_ != nullptr) {
197+
filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_);
198+
filter_slice_ = Slice(filter_str_.data(), filter_str_.size());
199+
}
171200

172201
Log(options_.info_log, "Number of prefixes: %d, suffix_map length %ld",
173202
hash_table_size_, sub_index_.length());
@@ -187,7 +216,6 @@ inline void PlainTableReader::GetHashValue(int bucket, uint64_t** ret_value) {
187216

188217
Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
189218
Status s;
190-
191219
int bucket = GetHashTableBucket(target);
192220
uint64_t* found_value;
193221
Slice hash_key;
@@ -248,6 +276,12 @@ Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
248276
return s;
249277
}
250278

279+
bool PlainTableReader::MayHavePrefix(const Slice& target_prefix) {
280+
return filter_policy_ == nullptr
281+
|| filter_policy_->KeyMayMatch(target_prefix, filter_slice_);
282+
}
283+
284+
251285
uint64_t PlainTableReader::Next(uint64_t offset, Slice* key, Slice* value,
252286
Slice* tmp_slice) {
253287
if (offset >= file_size_) {
@@ -321,6 +355,11 @@ void PlainTableIterator::SeekToLast() {
321355
}
322356

323357
void PlainTableIterator::Seek(const Slice& target) {
358+
if (!table_->MayHavePrefix(Slice(target.data(), table_->key_prefix_len_))) {
359+
offset_ = next_offset_ = table_->file_size_;
360+
return;
361+
}
362+
324363
Status s = table_->GetOffset(target, &next_offset_);
325364
if (!s.ok()) {
326365
status_ = s;

table/plain_table_reader.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ class PlainTableReader: public TableReader {
5757
static Status Open(const Options& options, const EnvOptions& soptions,
5858
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
5959
unique_ptr<TableReader>* table, const int user_key_size,
60-
const int key_prefix_len);
60+
const int key_prefix_len, const int bloom_num_bits,
61+
double hash_table_ratio);
6162

6263
bool PrefixMayMatch(const Slice& internal_prefix);
6364

@@ -79,7 +80,8 @@ class PlainTableReader: public TableReader {
7980
}
8081

8182
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
82-
int user_key_size, int key_prefix_len);
83+
int user_key_size, int key_prefix_len, int bloom_num_bits,
84+
double hash_table_ratio);
8385
~PlainTableReader();
8486

8587
private:
@@ -95,6 +97,10 @@ class PlainTableReader: public TableReader {
9597
uint64_t file_size_;
9698
const size_t user_key_size_;
9799
const size_t key_prefix_len_;
100+
const double hash_table_ratio_;
101+
const FilterPolicy* filter_policy_;
102+
std::string filter_str_;
103+
Slice filter_slice_;
98104

99105
TableProperties tbl_props;
100106

@@ -123,6 +129,7 @@ class PlainTableReader: public TableReader {
123129
Status PopulateIndex(uint64_t file_size);
124130
uint64_t Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice);
125131
Status GetOffset(const Slice& target, uint64_t* offset);
132+
bool MayHavePrefix(const Slice& target_prefix);
126133

127134
// No copying allowed
128135
explicit PlainTableReader(const TableReader&) = delete;

table/table_reader_bench.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ int main(int argc, char** argv) {
242242
if (FLAGS_plain_table) {
243243
options.allow_mmap_reads = true;
244244
env_options.use_mmap_reads = true;
245-
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len);
245+
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len);
246246
} else {
247247
tf = new rocksdb::BlockBasedTableFactory();
248248
}

0 commit comments

Comments
 (0)