Skip to content

Commit 4e6463e

Browse files
committed
[RocksDB][Performance Branch] Make height and branching factor configurable for skiplist implementation
Summary: As title. Especially, HashSkipListRepFactory will be able to specify a relatively small height, to reduce the memory overhead of one skiplist per bucket. Test Plan: make check and test it on leaf4 Reviewers: dhruba, sdong, kailiu CC: reconnect.grayhat, leveldb Differential Revision: https://reviews.facebook.net/D14307
1 parent 8aac46d commit 4e6463e

File tree

4 files changed

+64
-25
lines changed

4 files changed

+64
-25
lines changed

db/prefix_test.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ DEFINE_uint64(items_per_prefix, 10, "total number of values per prefix");
2222
DEFINE_int64(write_buffer_size, 1000000000, "");
2323
DEFINE_int64(max_write_buffer_number, 8, "");
2424
DEFINE_int64(min_write_buffer_number_to_merge, 7, "");
25+
DEFINE_int32(skiplist_height, 4, "");
2526

2627
// Path to the database on file system
2728
const std::string kDbName = rocksdb::test::TmpDir() + "/prefix_test";
@@ -111,7 +112,8 @@ class PrefixTest {
111112
options.prefix_extractor = prefix_extractor;
112113
if (FLAGS_use_nolock_version) {
113114
options.memtable_factory.reset(NewHashSkipListRepFactory(
114-
prefix_extractor, FLAGS_bucket_count));
115+
prefix_extractor, FLAGS_bucket_count,
116+
FLAGS_skiplist_height));
115117
} else {
116118
options.memtable_factory =
117119
std::make_shared<rocksdb::PrefixHashRepFactory>(
@@ -152,7 +154,7 @@ TEST(PrefixTest, DynamicPrefixIterator) {
152154
TestKey test_key(prefix, sorted);
153155

154156
Slice key = TestKeyToSlice(test_key);
155-
std::string value = "v" + std::to_string(sorted);
157+
std::string value(40, 0);
156158

157159
ASSERT_OK(db->Put(write_options, key, value));
158160
}

db/skiplist.h

+22-11
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ class SkipList {
4747
// Create a new SkipList object that will use "cmp" for comparing keys,
4848
// and will allocate memory using "*arena". Objects allocated in the arena
4949
// must remain allocated for the lifetime of the skiplist object.
50-
explicit SkipList(Comparator cmp, Arena* arena);
50+
explicit SkipList(Comparator cmp, Arena* arena,
51+
int32_t max_height = 12, int32_t branching_factor = 4);
5152

5253
// Insert key into the list.
5354
// REQUIRES: nothing that compares equal to key is currently in the list.
@@ -101,7 +102,8 @@ class SkipList {
101102
};
102103

103104
private:
104-
enum { kMaxHeight = 12 };
105+
const int32_t kMaxHeight_;
106+
const int32_t kBranching_;
105107

106108
// Immutable after construction
107109
Comparator const compare_;
@@ -114,8 +116,8 @@ class SkipList {
114116
port::AtomicPointer max_height_; // Height of the entire list
115117

116118
// Used for optimizing sequential insert patterns
117-
Node* prev_[kMaxHeight];
118-
int prev_height_;
119+
Node** prev_;
120+
int32_t prev_height_;
119121

120122
inline int GetMaxHeight() const {
121123
return static_cast<int>(
@@ -257,13 +259,12 @@ inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
257259
template<typename Key, class Comparator>
258260
int SkipList<Key,Comparator>::RandomHeight() {
259261
// Increase height with probability 1 in kBranching
260-
static const unsigned int kBranching = 4;
261262
int height = 1;
262-
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
263+
while (height < kMaxHeight_ && ((rnd_.Next() % kBranching_) == 0)) {
263264
height++;
264265
}
265266
assert(height > 0);
266-
assert(height <= kMaxHeight);
267+
assert(height <= kMaxHeight_);
267268
return height;
268269
}
269270

@@ -353,14 +354,24 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
353354
}
354355

355356
template<typename Key, class Comparator>
356-
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
357-
: compare_(cmp),
357+
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena,
358+
int32_t max_height,
359+
int32_t branching_factor)
360+
: kMaxHeight_(max_height),
361+
kBranching_(branching_factor),
362+
compare_(cmp),
358363
arena_(arena),
359-
head_(NewNode(0 /* any key will do */, kMaxHeight)),
364+
head_(NewNode(0 /* any key will do */, max_height)),
360365
max_height_(reinterpret_cast<void*>(1)),
361366
prev_height_(1),
362367
rnd_(0xdeadbeef) {
363-
for (int i = 0; i < kMaxHeight; i++) {
368+
assert(kMaxHeight_ > 0);
369+
assert(kBranching_ > 0);
370+
// Allocate the prev_ Node* array, directly from the passed-in arena.
371+
// prev_ does not need to be freed, as its life cycle is tied up with
372+
// the arena as a whole.
373+
prev_ = (Node**) arena_->AllocateAligned(sizeof(Node*) * kMaxHeight_);
374+
for (int i = 0; i < kMaxHeight_; i++) {
364375
head_->SetNext(i, nullptr);
365376
prev_[i] = head_;
366377
}

include/rocksdb/memtablerep.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,16 @@ class PrefixHashRepFactory : public TransformRepFactory {
267267

268268
// The same as TransformRepFactory except it doesn't use locks.
269269
// Experimental, will replace TransformRepFactory once we are sure
270-
// it performs better
270+
// it performs better. It contains a fixed array of buckets, each
271+
// pointing to a skiplist (null if the bucket is empty).
272+
// bucket_count: number of fixed array buckets
273+
// skiplist_height: the max height of the skiplist
274+
// skiplist_branching_factor: probabilistic size ratio between adjacent
275+
// link lists in the skiplist
271276
extern MemTableRepFactory* NewHashSkipListRepFactory(
272-
const SliceTransform* transform, size_t bucket_count = 1000000);
277+
const SliceTransform* transform, size_t bucket_count = 1000000,
278+
int32_t skiplist_height = 4, int32_t skiplist_branching_factor = 4
279+
);
273280

274281
}
275282

util/hash_skiplist_rep.cc

+29-10
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ namespace {
2020
class HashSkipListRep : public MemTableRep {
2121
public:
2222
HashSkipListRep(MemTableRep::KeyComparator& compare, Arena* arena,
23-
const SliceTransform* transform, size_t bucket_size);
23+
const SliceTransform* transform, size_t bucket_size,
24+
int32_t skiplist_height, int32_t skiplist_branching_factor);
2425

2526
virtual void Insert(const char* key) override;
2627

@@ -47,6 +48,9 @@ class HashSkipListRep : public MemTableRep {
4748

4849
size_t bucket_size_;
4950

51+
const int32_t skiplist_height_;
52+
const int32_t skiplist_branching_factor_;
53+
5054
// Maps slices (which are transformed user keys) to buckets of keys sharing
5155
// the same transform.
5256
port::AtomicPointer* buckets_;
@@ -215,8 +219,12 @@ class HashSkipListRep : public MemTableRep {
215219
};
216220

217221
HashSkipListRep::HashSkipListRep(MemTableRep::KeyComparator& compare,
218-
Arena* arena, const SliceTransform* transform, size_t bucket_size)
222+
Arena* arena, const SliceTransform* transform,
223+
size_t bucket_size, int32_t skiplist_height,
224+
int32_t skiplist_branching_factor)
219225
: bucket_size_(bucket_size),
226+
skiplist_height_(skiplist_height),
227+
skiplist_branching_factor_(skiplist_branching_factor),
220228
transform_(transform),
221229
compare_(compare),
222230
arena_(arena),
@@ -239,7 +247,8 @@ HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
239247
auto bucket = GetBucket(hash);
240248
if (bucket == nullptr) {
241249
auto addr = arena_->AllocateAligned(sizeof(Bucket));
242-
bucket = new (addr) Bucket(compare_, arena_);
250+
bucket = new (addr) Bucket(compare_, arena_, skiplist_height_,
251+
skiplist_branching_factor_);
243252
buckets_[hash].Release_Store(static_cast<void*>(bucket));
244253
}
245254
return bucket;
@@ -302,17 +311,23 @@ std::shared_ptr<MemTableRep::Iterator>
302311

303312
class HashSkipListRepFactory : public MemTableRepFactory {
304313
public:
305-
explicit HashSkipListRepFactory(const SliceTransform* transform,
306-
size_t bucket_count = 1000000)
307-
: transform_(transform),
308-
bucket_count_(bucket_count) { }
314+
explicit HashSkipListRepFactory(
315+
const SliceTransform* transform,
316+
size_t bucket_count,
317+
int32_t skiplist_height,
318+
int32_t skiplist_branching_factor)
319+
: transform_(transform),
320+
bucket_count_(bucket_count),
321+
skiplist_height_(skiplist_height),
322+
skiplist_branching_factor_(skiplist_branching_factor) { }
309323

310324
virtual ~HashSkipListRepFactory() { delete transform_; }
311325

312326
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
313327
MemTableRep::KeyComparator& compare, Arena* arena) override {
314328
return std::make_shared<HashSkipListRep>(compare, arena, transform_,
315-
bucket_count_);
329+
bucket_count_, skiplist_height_,
330+
skiplist_branching_factor_);
316331
}
317332

318333
virtual const char* Name() const override {
@@ -324,11 +339,15 @@ class HashSkipListRepFactory : public MemTableRepFactory {
324339
private:
325340
const SliceTransform* transform_;
326341
const size_t bucket_count_;
342+
const int32_t skiplist_height_;
343+
const int32_t skiplist_branching_factor_;
327344
};
328345

329346
MemTableRepFactory* NewHashSkipListRepFactory(
330-
const SliceTransform* transform, size_t bucket_count) {
331-
return new HashSkipListRepFactory(transform, bucket_count);
347+
const SliceTransform* transform, size_t bucket_count,
348+
int32_t skiplist_height, int32_t skiplist_branching_factor) {
349+
return new HashSkipListRepFactory(transform, bucket_count,
350+
skiplist_height, skiplist_branching_factor);
332351
}
333352

334353
} // namespace rocksdb

0 commit comments

Comments
 (0)