Skip to content

Commit 12b6d2b

Browse files
committed
Separate the aligned and unaligned memory allocation
Summary: Use two vectors for different types of memory allocation. Test Plan: run all unit tests. Reviewers: haobo, sdong Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15027
1 parent 50994bf commit 12b6d2b

File tree

4 files changed

+152
-97
lines changed

4 files changed

+152
-97
lines changed

include/rocksdb/options.h

+36-31
Original file line numberDiff line numberDiff line change
@@ -44,28 +44,25 @@ using std::shared_ptr;
4444
enum CompressionType : char {
4545
// NOTE: do not change the values of existing entries, as these are
4646
// part of the persistent format on disk.
47-
kNoCompression = 0x0,
47+
kNoCompression = 0x0,
4848
kSnappyCompression = 0x1,
4949
kZlibCompression = 0x2,
5050
kBZip2Compression = 0x3
5151
};
5252

5353
enum CompactionStyle : char {
54-
kCompactionStyleLevel = 0x0, // level based compaction style
55-
kCompactionStyleUniversal = 0x1 // Universal compaction style
54+
kCompactionStyleLevel = 0x0, // level based compaction style
55+
kCompactionStyleUniversal = 0x1 // Universal compaction style
5656
};
5757

5858
// Compression options for different compression algorithms like Zlib
5959
struct CompressionOptions {
6060
int window_bits;
6161
int level;
6262
int strategy;
63-
CompressionOptions():window_bits(-14),
64-
level(-1),
65-
strategy(0){}
66-
CompressionOptions(int wbits, int lev, int strategy):window_bits(wbits),
67-
level(lev),
68-
strategy(strategy){}
63+
CompressionOptions() : window_bits(-14), level(-1), strategy(0) {}
64+
CompressionOptions(int wbits, int lev, int strategy)
65+
: window_bits(wbits), level(lev), strategy(strategy) {}
6966
};
7067

7168
// Options to control the behavior of a database (passed to DB::Open)
@@ -216,7 +213,6 @@ struct Options {
216213
// Default: 16
217214
int block_restart_interval;
218215

219-
220216
// Compress blocks using the specified compression algorithm. This
221217
// parameter can be changed dynamically.
222218
//
@@ -247,7 +243,7 @@ struct Options {
247243
// java/C api hard to construct.
248244
std::vector<CompressionType> compression_per_level;
249245

250-
//different options for compression algorithms
246+
// different options for compression algorithms
251247
CompressionOptions compression_opts;
252248

253249
// If non-nullptr, use the specified filter policy to reduce disk reads.
@@ -326,7 +322,6 @@ struct Options {
326322
// will be 20MB, total file size for level-2 will be 200MB,
327323
// and total file size for level-3 will be 2GB.
328324

329-
330325
// by default 'max_bytes_for_level_base' is 10MB.
331326
uint64_t max_bytes_for_level_base;
332327
// by default 'max_bytes_for_level_base' is 10.
@@ -484,10 +479,19 @@ struct Options {
484479
// order.
485480
int table_cache_remove_scan_count_limit;
486481

487-
// size of one block in arena memory allocation.
488-
// If <= 0, a proper value is automatically calculated (usually 1/10 of
482+
// Size of one block in arena memory allocation.
483+
//
484+
// If <= 0, a proper value is automatically calculated (usually about 1/10 of
489485
// writer_buffer_size).
490486
//
487+
// There are two additonal restriction of the The specified size:
488+
// (1) size should be in the range of [4096, 2 << 30] and
489+
// (2) be the multiple of the CPU word (which helps with the memory
490+
// alignment).
491+
//
492+
// We'll automatically check and adjust the size number to make sure it
493+
// conforms to the restrictions.
494+
//
491495
// Default: 0
492496
size_t arena_block_size;
493497

@@ -572,7 +576,12 @@ struct Options {
572576
// Specify the file access pattern once a compaction is started.
573577
// It will be applied to all input files of a compaction.
574578
// Default: NORMAL
575-
enum { NONE, NORMAL, SEQUENTIAL, WILLNEED } access_hint_on_compaction_start;
579+
enum {
580+
NONE,
581+
NORMAL,
582+
SEQUENTIAL,
583+
WILLNEED
584+
} access_hint_on_compaction_start;
576585

577586
// Use adaptive mutex, which spins in the user space before resorting
578587
// to kernel. This could reduce context switch when the mutex is not
@@ -622,7 +631,7 @@ struct Options {
622631
// Default: emtpy vector -- no user-defined statistics collection will be
623632
// performed.
624633
std::vector<std::shared_ptr<TablePropertiesCollector>>
625-
table_properties_collectors;
634+
table_properties_collectors;
626635

627636
// Allows thread-safe inplace updates. Requires Updates iff
628637
// * key exists in current memtable
@@ -644,7 +653,7 @@ struct Options {
644653
// the block cache. It will not page in data from the OS cache or data that
645654
// resides in storage.
646655
enum ReadTier {
647-
kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage
656+
kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage
648657
kBlockCacheTier = 0x1 // data in memtable or block cache
649658
};
650659

@@ -697,13 +706,14 @@ struct ReadOptions {
697706
prefix_seek(false),
698707
snapshot(nullptr),
699708
prefix(nullptr),
700-
read_tier(kReadAllTier) {
701-
}
702-
ReadOptions(bool cksum, bool cache) :
703-
verify_checksums(cksum), fill_cache(cache),
704-
prefix_seek(false), snapshot(nullptr), prefix(nullptr),
705-
read_tier(kReadAllTier) {
706-
}
709+
read_tier(kReadAllTier) {}
710+
ReadOptions(bool cksum, bool cache)
711+
: verify_checksums(cksum),
712+
fill_cache(cache),
713+
prefix_seek(false),
714+
snapshot(nullptr),
715+
prefix(nullptr),
716+
read_tier(kReadAllTier) {}
707717
};
708718

709719
// Options that control write operations
@@ -730,10 +740,7 @@ struct WriteOptions {
730740
// and the write may got lost after a crash.
731741
bool disableWAL;
732742

733-
WriteOptions()
734-
: sync(false),
735-
disableWAL(false) {
736-
}
743+
WriteOptions() : sync(false), disableWAL(false) {}
737744
};
738745

739746
// Options that control flush operations
@@ -742,9 +749,7 @@ struct FlushOptions {
742749
// Default: true
743750
bool wait;
744751

745-
FlushOptions()
746-
: wait(true) {
747-
}
752+
FlushOptions() : wait(true) {}
748753
};
749754

750755
} // namespace rocksdb

util/arena_impl.cc

+48-33
Original file line numberDiff line numberDiff line change
@@ -8,71 +8,86 @@
88
// found in the LICENSE file. See the AUTHORS file for names of contributors.
99

1010
#include "util/arena_impl.h"
11+
#include <algorithm>
1112

1213
namespace rocksdb {
1314

14-
ArenaImpl::ArenaImpl(size_t block_size) {
15-
if (block_size < kMinBlockSize) {
16-
block_size_ = kMinBlockSize;
17-
} else if (block_size > kMaxBlockSize) {
18-
block_size_ = kMaxBlockSize;
19-
} else {
20-
block_size_ = block_size;
15+
const size_t ArenaImpl::kMinBlockSize = 4096;
16+
const size_t ArenaImpl::kMaxBlockSize = 2 << 30;
17+
static const int kAlignUnit = sizeof(void*);
18+
19+
size_t OptimizeBlockSize(size_t block_size) {
20+
// Make sure block_size is in optimal range
21+
block_size = std::max(ArenaImpl::kMinBlockSize, block_size);
22+
block_size = std::min(ArenaImpl::kMaxBlockSize, block_size);
23+
24+
// make sure block_size is the multiple of kAlignUnit
25+
if (block_size % kAlignUnit != 0) {
26+
block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
2127
}
2228

23-
blocks_memory_ = 0;
24-
alloc_ptr_ = nullptr; // First allocation will allocate a block
25-
alloc_bytes_remaining_ = 0;
29+
return block_size;
30+
}
31+
32+
ArenaImpl::ArenaImpl(size_t block_size)
33+
: kBlockSize(OptimizeBlockSize(block_size)) {
34+
assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
35+
kBlockSize % kAlignUnit == 0);
2636
}
2737

2838
ArenaImpl::~ArenaImpl() {
29-
for (size_t i = 0; i < blocks_.size(); i++) {
30-
delete[] blocks_[i];
39+
for (const auto& block : blocks_) {
40+
delete[] block;
3141
}
3242
}
3343

34-
char* ArenaImpl::AllocateFallback(size_t bytes) {
35-
if (bytes > block_size_ / 4) {
44+
char* ArenaImpl::AllocateFallback(size_t bytes, bool aligned) {
45+
if (bytes > kBlockSize / 4) {
3646
// Object is more than a quarter of our block size. Allocate it separately
3747
// to avoid wasting too much space in leftover bytes.
38-
char* result = AllocateNewBlock(bytes);
39-
return result;
48+
return AllocateNewBlock(bytes);
4049
}
4150

4251
// We waste the remaining space in the current block.
43-
alloc_ptr_ = AllocateNewBlock(block_size_);
44-
alloc_bytes_remaining_ = block_size_;
52+
auto block_head = AllocateNewBlock(kBlockSize);
53+
alloc_bytes_remaining_ = kBlockSize - bytes;
4554

46-
char* result = alloc_ptr_;
47-
alloc_ptr_ += bytes;
48-
alloc_bytes_remaining_ -= bytes;
49-
return result;
55+
if (aligned) {
56+
aligned_alloc_ptr_ = block_head + bytes;
57+
unaligned_alloc_ptr_ = block_head + kBlockSize;
58+
return block_head;
59+
} else {
60+
aligned_alloc_ptr_ = block_head;
61+
unaligned_alloc_ptr_ = block_head + kBlockSize - bytes;
62+
return unaligned_alloc_ptr_;
63+
}
5064
}
5165

5266
char* ArenaImpl::AllocateAligned(size_t bytes) {
53-
const int align = sizeof(void*); // We'll align to pointer size
54-
assert((align & (align-1)) == 0); // Pointer size should be a power of 2
55-
size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align-1);
56-
size_t slop = (current_mod == 0 ? 0 : align - current_mod);
67+
assert((kAlignUnit & (kAlignUnit - 1)) ==
68+
0); // Pointer size should be a power of 2
69+
size_t current_mod =
70+
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
71+
size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
5772
size_t needed = bytes + slop;
5873
char* result;
5974
if (needed <= alloc_bytes_remaining_) {
60-
result = alloc_ptr_ + slop;
61-
alloc_ptr_ += needed;
75+
result = aligned_alloc_ptr_ + slop;
76+
aligned_alloc_ptr_ += needed;
6277
alloc_bytes_remaining_ -= needed;
6378
} else {
6479
// AllocateFallback always returned aligned memory
65-
result = AllocateFallback(bytes);
80+
result = AllocateFallback(bytes, true /* aligned */);
6681
}
67-
assert((reinterpret_cast<uintptr_t>(result) & (align-1)) == 0);
82+
assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
6883
return result;
6984
}
7085

7186
char* ArenaImpl::AllocateNewBlock(size_t block_bytes) {
72-
char* result = new char[block_bytes];
87+
char* block = new char[block_bytes];
7388
blocks_memory_ += block_bytes;
74-
blocks_.push_back(result);
75-
return result;
89+
blocks_.push_back(block);
90+
return block;
7691
}
7792

7893
} // namespace rocksdb

util/arena_impl.h

+38-29
Original file line numberDiff line numberDiff line change
@@ -22,49 +22,54 @@ namespace rocksdb {
2222

2323
class ArenaImpl : public Arena {
2424
public:
25+
// No copying allowed
26+
ArenaImpl(const ArenaImpl&) = delete;
27+
void operator=(const ArenaImpl&) = delete;
28+
29+
static const size_t kMinBlockSize;
30+
static const size_t kMaxBlockSize;
31+
2532
explicit ArenaImpl(size_t block_size = kMinBlockSize);
2633
virtual ~ArenaImpl();
2734

28-
virtual char* Allocate(size_t bytes);
35+
virtual char* Allocate(size_t bytes) override;
2936

30-
virtual char* AllocateAligned(size_t bytes);
37+
virtual char* AllocateAligned(size_t bytes) override;
3138

3239
// Returns an estimate of the total memory usage of data allocated
33-
// by the arena (including space allocated but not yet used for user
40+
// by the arena (exclude the space allocated but not yet used for future
3441
// allocations).
35-
//
36-
// TODO: Do we need to exclude space allocated but not used?
3742
virtual const size_t ApproximateMemoryUsage() {
38-
return blocks_memory_ + blocks_.capacity() * sizeof(char*);
43+
return blocks_memory_ + blocks_.capacity() * sizeof(char*) -
44+
alloc_bytes_remaining_;
3945
}
4046

41-
virtual const size_t MemoryAllocatedBytes() {
47+
virtual const size_t MemoryAllocatedBytes() override {
4248
return blocks_memory_;
4349
}
4450

4551
private:
46-
char* AllocateFallback(size_t bytes);
47-
char* AllocateNewBlock(size_t block_bytes);
48-
49-
static const size_t kMinBlockSize = 4096;
50-
static const size_t kMaxBlockSize = 2 << 30;
51-
5252
// Number of bytes allocated in one block
53-
size_t block_size_;
54-
55-
// Allocation state
56-
char* alloc_ptr_;
57-
size_t alloc_bytes_remaining_;
58-
53+
const size_t kBlockSize;
5954
// Array of new[] allocated memory blocks
60-
std::vector<char*> blocks_;
55+
typedef std::vector<char*> Blocks;
56+
Blocks blocks_;
57+
58+
// Stats for current active block.
59+
// For each block, we allocate aligned memory chucks from one end and
60+
// allocate unaligned memory chucks from the other end. Otherwise the
61+
// memory waste for alignment will be higher if we allocate both types of
62+
// memory from one direction.
63+
char* unaligned_alloc_ptr_ = nullptr;
64+
char* aligned_alloc_ptr_ = nullptr;
65+
// How many bytes left in currently active block?
66+
size_t alloc_bytes_remaining_ = 0;
67+
68+
char* AllocateFallback(size_t bytes, bool aligned);
69+
char* AllocateNewBlock(size_t block_bytes);
6170

6271
// Bytes of memory in blocks allocated so far
63-
size_t blocks_memory_;
64-
65-
// No copying allowed
66-
ArenaImpl(const ArenaImpl&);
67-
void operator=(const ArenaImpl&);
72+
size_t blocks_memory_ = 0;
6873
};
6974

7075
inline char* ArenaImpl::Allocate(size_t bytes) {
@@ -73,12 +78,16 @@ inline char* ArenaImpl::Allocate(size_t bytes) {
7378
// them for our internal use).
7479
assert(bytes > 0);
7580
if (bytes <= alloc_bytes_remaining_) {
76-
char* result = alloc_ptr_;
77-
alloc_ptr_ += bytes;
81+
unaligned_alloc_ptr_ -= bytes;
7882
alloc_bytes_remaining_ -= bytes;
79-
return result;
83+
return unaligned_alloc_ptr_;
8084
}
81-
return AllocateFallback(bytes);
85+
return AllocateFallback(bytes, false /* unaligned */);
8286
}
8387

88+
// check and adjust the block_size so that the return value is
89+
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
90+
// 2. the multiple of align unit.
91+
extern size_t OptimizeBlockSize(size_t block_size);
92+
8493
} // namespace rocksdb

0 commit comments

Comments
 (0)