Skip to content

Commit 9718c79

Browse files
committed
[Performance Branch] Fix a bug of PlainTable when building indexes
Summary: PlainTable now has a bug of the ordering of indexes for the prefixes in the same bucket. I thought std::map guaranteed key order but it didn't, probably because I didn't use it properly. But seems to me that we don't need to make extra sorting as input prefixes are already sorted. Found by problem by running leaf4 against plain table. Replace the map with a vector. It should performs better too. After the fix, leaf4 unit tests are passing. Test Plan: run plain_table_db_test Also going to run db_test with plain table in the uncommitted branch. Reviewers: haobo, kailiu Reviewed By: haobo CC: nkg-, leveldb Differential Revision: https://reviews.facebook.net/D14649
1 parent 0cd1521 commit 9718c79

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

table/plain_table_reader.cc

+15-15
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,6 @@ struct hash<rocksdb::Slice> {
3737
return MurmurHash(s.data(), s.size(), 397);
3838
}
3939
};
40-
41-
class slice_comparator {
42-
public:
43-
bool operator()(rocksdb::Slice const& s1,
44-
rocksdb::Slice const& s2) const {
45-
return s1.compare(s2) < 0;
46-
}
47-
};
4840
}
4941

5042
namespace rocksdb {
@@ -146,8 +138,8 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
146138
HistogramImpl keys_per_prefix_hist;
147139
// Need map to be ordered to make sure sub indexes generated
148140
// are in order.
149-
std::map<Slice, std::string, std::slice_comparator> prefix2map;
150-
141+
std::vector<std::pair<Slice, std::string>> prefix_index_pairs;
142+
std::string current_prefix_index;
151143
while (pos < file_size) {
152144
uint32_t key_offset = pos;
153145
status_ = Next(pos, &key_slice, &value_slice, pos);
@@ -156,34 +148,42 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
156148
if (first || prev_key_prefix_slice != key_prefix_slice) {
157149
if (!first) {
158150
keys_per_prefix_hist.Add(key_index_within_prefix);
151+
prefix_index_pairs.push_back(
152+
std::make_pair<Slice, std::string>(
153+
std::move(prev_key_prefix_slice),
154+
std::move(current_prefix_index)));
155+
current_prefix_index.clear();
159156
}
160157
key_index_within_prefix = 0;
161158
prev_key_prefix_slice = key_prefix_slice;
162159
}
163160

164161
if (key_index_within_prefix++ % 8 == 0) {
165162
// Add an index key for every 8 keys
166-
std::string& prefix_index = prefix2map[key_prefix_slice];
167-
PutFixed32(&prefix_index, key_offset);
163+
PutFixed32(&current_prefix_index, key_offset);
168164
}
169165
first = false;
170166
}
167+
prefix_index_pairs.push_back(
168+
std::make_pair<Slice, std::string>(std::move(prev_key_prefix_slice),
169+
std::move(current_prefix_index)));
170+
171171
keys_per_prefix_hist.Add(key_index_within_prefix);
172172
if (hash_table_ != nullptr) {
173173
delete[] hash_table_;
174174
}
175175
std::vector<Slice> filter_entries(0); // for creating bloom filter;
176176
if (filter_policy_ != nullptr) {
177-
filter_entries.reserve(prefix2map.size());
177+
filter_entries.reserve(prefix_index_pairs.size());
178178
}
179179
double hash_table_size_multipier =
180180
(hash_table_ratio_ > 1.0) ? 1.0 : 1.0 / hash_table_ratio_;
181-
hash_table_size_ = prefix2map.size() * hash_table_size_multipier + 1;
181+
hash_table_size_ = prefix_index_pairs.size() * hash_table_size_multipier + 1;
182182
hash_table_ = new uint32_t[hash_table_size_];
183183
std::vector<std::string> hash2map(hash_table_size_);
184184

185185
size_t sub_index_size_needed = 0;
186-
for (auto& p: prefix2map) {
186+
for (auto& p: prefix_index_pairs) {
187187
auto& sub_index = hash2map[getBucketId(p.first, key_prefix_len_,
188188
hash_table_size_)];
189189
if (sub_index.length() > 0 || p.second.length() > kOffsetLen) {

0 commit comments

Comments
 (0)