Skip to content

Commit 551e942

Browse files
committed
Merge branch 'master' into performance
2 parents ef2211a + b1d2de4 commit 551e942

37 files changed

+523
-851
lines changed

INSTALL.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ libraries. You are on your own.
1919

2020
* **Linux**
2121
* Upgrade your gcc to version at least 4.7 to get C++11 support.
22-
* Install gflags. If you're on Ubuntu, here's a nice tutorial:
22+
* Install gflags. First, try: `sudo apt-get install libgflags-dev`.
23+
If this doesn't work and you're using Ubuntu, here's a nice tutorial:
2324
(http://askubuntu.com/questions/312173/installing-gflags-12-04)
2425
* Install snappy. This is usually as easy as:
2526
`sudo apt-get install libsnappy-dev`.
2627
* Install zlib. Try: `sudo apt-get install zlib1g-dev`.
2728
* Install bzip2: `sudo apt-get install libbz2-dev`.
28-
* Install gflags: `sudo apt-get install libgflags-dev`.
2929
* **OS X**:
3030
* Install latest C++ compiler that supports C++ 11:
3131
* Update XCode: run `xcode-select --install` (or install it from XCode App's settting).

README

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ The core of this code has been derived from open-source leveldb.
1616
The code under this directory implements a system for maintaining a
1717
persistent key/value store.
1818

19-
See doc/index.html for more explanation.
20-
See doc/impl.html for a brief overview of the implementation.
19+
See doc/index.html and github wiki (https://github.com/facebook/rocksdb/wiki)
20+
for more explanation.
2121

2222
The public interface is in include/*. Callers should not include or
2323
rely on the details of any other header files in this package. Those

build_tools/regression_build_test.sh

+22-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
6565
--sync=0 \
6666
--threads=8 > ${STAT_FILE}.overwrite
6767

68-
# fill up the db for readrandom benchmark
68+
# fill up the db for readrandom benchmark (1GB total size)
6969
./db_bench \
7070
--benchmarks=fillseq \
7171
--db=$DATA_DIR \
@@ -83,7 +83,7 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
8383
--sync=0 \
8484
--threads=1 > /dev/null
8585

86-
# measure readrandom
86+
# measure readrandom with 6GB block cache
8787
./db_bench \
8888
--benchmarks=readrandom \
8989
--db=$DATA_DIR \
@@ -102,6 +102,25 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
102102
--sync=0 \
103103
--threads=32 > ${STAT_FILE}.readrandom
104104

105+
# measure readrandom with 300MB block cache
106+
./db_bench \
107+
--benchmarks=readrandom \
108+
--db=$DATA_DIR \
109+
--use_existing_db=1 \
110+
--bloom_bits=10 \
111+
--num=$NUM \
112+
--reads=$NUM \
113+
--cache_size=314572800 \
114+
--cache_numshardbits=8 \
115+
--open_files=55000 \
116+
--disable_seek_compaction=1 \
117+
--statistics=1 \
118+
--histogram=1 \
119+
--disable_data_sync=1 \
120+
--disable_wal=1 \
121+
--sync=0 \
122+
--threads=32 > ${STAT_FILE}.readrandomsmallblockcache
123+
105124
# measure memtable performance -- none of the data gets flushed to disk
106125
./db_bench \
107126
--benchmarks=fillrandom,readrandom, \
@@ -154,5 +173,6 @@ function send_benchmark_to_ods {
154173
send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
155174
send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
156175
send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
176+
send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache
157177
send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom
158178
send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom

db/db_bench.cc

+29-15
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,10 @@ DEFINE_int32(universal_max_merge_width, 0, "The max number of files to compact"
191191
DEFINE_int32(universal_max_size_amplification_percent, 0,
192192
"The max size amplification for universal style compaction");
193193

194+
DEFINE_int32(universal_compression_size_percent, -1,
195+
"The percentage of the database to compress for universal "
196+
"compaction. -1 means compress everything.");
197+
194198
DEFINE_int64(cache_size, -1, "Number of bytes to use as a cache of uncompressed"
195199
"data. Negative means use default settings.");
196200

@@ -325,6 +329,23 @@ DEFINE_string(compression_type, "snappy",
325329
static enum rocksdb::CompressionType FLAGS_compression_type_e =
326330
rocksdb::kSnappyCompression;
327331

332+
DEFINE_int32(compression_level, -1,
333+
"Compression level. For zlib this should be -1 for the "
334+
"default level, or between 0 and 9.");
335+
336+
static bool ValidateCompressionLevel(const char* flagname, int32_t value) {
337+
if (value < -1 || value > 9) {
338+
fprintf(stderr, "Invalid value for --%s: %d, must be between -1 and 9\n",
339+
flagname, value);
340+
return false;
341+
}
342+
return true;
343+
}
344+
345+
static const bool FLAGS_compression_level_dummy =
346+
google::RegisterFlagValidator(&FLAGS_compression_level,
347+
&ValidateCompressionLevel);
348+
328349
DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
329350
" from this level. Levels with number < min_level_to_compress are"
330351
" not compressed. Otherwise, apply compression_type to "
@@ -434,12 +455,11 @@ static bool ValidatePrefixSize(const char* flagname, int32_t value) {
434455
}
435456
return true;
436457
}
437-
DEFINE_int32(prefix_size, 0, "Control the prefix size for PrefixHashRep");
458+
DEFINE_int32(prefix_size, 0, "Control the prefix size for HashSkipList");
438459

439460
enum RepFactory {
440461
kSkipList,
441462
kPrefixHash,
442-
kUnsorted,
443463
kVectorRep
444464
};
445465
enum RepFactory StringToRepFactory(const char* ctype) {
@@ -449,8 +469,6 @@ enum RepFactory StringToRepFactory(const char* ctype) {
449469
return kSkipList;
450470
else if (!strcasecmp(ctype, "prefix_hash"))
451471
return kPrefixHash;
452-
else if (!strcasecmp(ctype, "unsorted"))
453-
return kUnsorted;
454472
else if (!strcasecmp(ctype, "vector"))
455473
return kVectorRep;
456474

@@ -807,9 +825,6 @@ class Benchmark {
807825
case kSkipList:
808826
fprintf(stdout, "Memtablerep: skip_list\n");
809827
break;
810-
case kUnsorted:
811-
fprintf(stdout, "Memtablerep: unsorted\n");
812-
break;
813828
case kVectorRep:
814829
fprintf(stdout, "Memtablerep: vector\n");
815830
break;
@@ -1334,14 +1349,8 @@ class Benchmark {
13341349
}
13351350
switch (FLAGS_rep_factory) {
13361351
case kPrefixHash:
1337-
options.memtable_factory.reset(
1338-
new PrefixHashRepFactory(NewFixedPrefixTransform(FLAGS_prefix_size))
1339-
);
1340-
break;
1341-
case kUnsorted:
1342-
options.memtable_factory.reset(
1343-
new UnsortedRepFactory
1344-
);
1352+
options.memtable_factory.reset(NewHashSkipListRepFactory(
1353+
NewFixedPrefixTransform(FLAGS_prefix_size)));
13451354
break;
13461355
case kSkipList:
13471356
// no need to do anything
@@ -1368,6 +1377,7 @@ class Benchmark {
13681377
options.level0_slowdown_writes_trigger =
13691378
FLAGS_level0_slowdown_writes_trigger;
13701379
options.compression = FLAGS_compression_type_e;
1380+
options.compression_opts.level = FLAGS_compression_level;
13711381
options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
13721382
options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;
13731383
if (FLAGS_min_level_to_compress >= 0) {
@@ -1429,6 +1439,10 @@ class Benchmark {
14291439
options.compaction_options_universal.max_size_amplification_percent =
14301440
FLAGS_universal_max_size_amplification_percent;
14311441
}
1442+
if (FLAGS_universal_compression_size_percent != -1) {
1443+
options.compaction_options_universal.compression_size_percent =
1444+
FLAGS_universal_compression_size_percent;
1445+
}
14321446

14331447
Status s;
14341448
if(FLAGS_readonly) {

db/db_impl.cc

+54-14
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "util/auto_roll_logger.h"
5252
#include "util/build_version.h"
5353
#include "util/coding.h"
54+
#include "util/hash_skiplist_rep.h"
5455
#include "util/logging.h"
5556
#include "util/mutexlock.h"
5657
#include "util/perf_context_imp.h"
@@ -163,10 +164,10 @@ Options SanitizeOptions(const std::string& dbname,
163164
Log(result.info_log, "Compaction filter specified, ignore factory");
164165
}
165166
if (result.prefix_extractor) {
166-
// If a prefix extractor has been supplied and a PrefixHashRepFactory is
167+
// If a prefix extractor has been supplied and a HashSkipListRepFactory is
167168
// being used, make sure that the latter uses the former as its transform
168169
// function.
169-
auto factory = dynamic_cast<PrefixHashRepFactory*>(
170+
auto factory = dynamic_cast<HashSkipListRepFactory*>(
170171
result.memtable_factory.get());
171172
if (factory &&
172173
factory->GetTransform() != result.prefix_extractor) {
@@ -236,7 +237,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
236237
mutex_(options.use_adaptive_mutex),
237238
shutting_down_(nullptr),
238239
bg_cv_(&mutex_),
239-
mem_rep_factory_(options_.memtable_factory),
240+
mem_rep_factory_(options_.memtable_factory.get()),
240241
mem_(new MemTable(internal_comparator_, mem_rep_factory_,
241242
NumberLevels(), options_)),
242243
logfile_number_(0),
@@ -516,6 +517,19 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
516517
// files in sst_delete_files and log_delete_files.
517518
// It is not necessary to hold the mutex when invoking this method.
518519
void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
520+
521+
// free pending memtables
522+
for (auto m : state.memtables_to_free) {
523+
delete m;
524+
}
525+
526+
// check if there is anything to do
527+
if (!state.all_files.size() &&
528+
!state.sst_delete_files.size() &&
529+
!state.log_delete_files.size()) {
530+
return;
531+
}
532+
519533
// this checks if FindObsoleteFiles() was run before. If not, don't do
520534
// PurgeObsoleteFiles(). If FindObsoleteFiles() was run, we need to also
521535
// run PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true
@@ -1170,7 +1184,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
11701184
// Replace immutable memtable with the generated Table
11711185
s = imm_.InstallMemtableFlushResults(
11721186
mems, versions_.get(), s, &mutex_, options_.info_log.get(),
1173-
file_number, pending_outputs_);
1187+
file_number, pending_outputs_, &deletion_state.memtables_to_free);
11741188

11751189
if (s.ok()) {
11761190
if (madeProgress) {
@@ -1656,7 +1670,7 @@ Status DBImpl::BackgroundFlush(bool* madeProgress,
16561670

16571671
void DBImpl::BackgroundCallFlush() {
16581672
bool madeProgress = false;
1659-
DeletionState deletion_state;
1673+
DeletionState deletion_state(options_.max_write_buffer_number);
16601674
assert(bg_flush_scheduled_);
16611675
MutexLock l(&mutex_);
16621676

@@ -1702,7 +1716,7 @@ void DBImpl::TEST_PurgeObsoleteteWAL() {
17021716

17031717
void DBImpl::BackgroundCallCompaction() {
17041718
bool madeProgress = false;
1705-
DeletionState deletion_state;
1719+
DeletionState deletion_state(options_.max_write_buffer_number);
17061720

17071721
MaybeDumpStats();
17081722

@@ -1732,6 +1746,7 @@ void DBImpl::BackgroundCallCompaction() {
17321746
// FindObsoleteFiles(). This is because deletion_state does not catch
17331747
// all created files if compaction failed.
17341748
FindObsoleteFiles(deletion_state, !s.ok());
1749+
17351750
// delete unnecessary files if any, this is done outside the mutex
17361751
if (deletion_state.HaveSomethingToDelete()) {
17371752
mutex_.Unlock();
@@ -2492,25 +2507,20 @@ struct IterState {
24922507

24932508
static void CleanupIteratorState(void* arg1, void* arg2) {
24942509
IterState* state = reinterpret_cast<IterState*>(arg1);
2495-
std::vector<MemTable*> to_delete;
2496-
to_delete.reserve(state->mem.size());
2510+
DBImpl::DeletionState deletion_state(state->db->GetOptions().
2511+
max_write_buffer_number);
24972512
state->mu->Lock();
24982513
for (unsigned int i = 0; i < state->mem.size(); i++) {
24992514
MemTable* m = state->mem[i]->Unref();
25002515
if (m != nullptr) {
2501-
to_delete.push_back(m);
2516+
deletion_state.memtables_to_free.push_back(m);
25022517
}
25032518
}
25042519
state->version->Unref();
2505-
// delete only the sst obsolete files
2506-
DBImpl::DeletionState deletion_state;
25072520
// fast path FindObsoleteFiles
25082521
state->db->FindObsoleteFiles(deletion_state, false, true);
25092522
state->mu->Unlock();
25102523
state->db->PurgeObsoleteFiles(deletion_state);
2511-
2512-
// delete obsolete memtables outside the db-mutex
2513-
for (MemTable* m : to_delete) delete m;
25142524
delete state;
25152525
}
25162526
} // namespace
@@ -2612,8 +2622,10 @@ Status DBImpl::GetImpl(const ReadOptions& options,
26122622
BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
26132623
if (mem->Get(lkey, value, &s, merge_context, options_)) {
26142624
// Done
2625+
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
26152626
} else if (imm.Get(lkey, value, &s, merge_context, options_)) {
26162627
// Done
2628+
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
26172629
} else {
26182630
StopWatchNano from_files_timer(env_, false);
26192631
StartPerfTimer(&from_files_timer);
@@ -2622,6 +2634,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
26222634
options_, value_found);
26232635
have_stat_update = true;
26242636
BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer);
2637+
RecordTick(options_.statistics.get(), MEMTABLE_MISS);
26252638
}
26262639

26272640
StopWatchNano post_process_timer(env_, false);
@@ -3514,6 +3527,33 @@ void DBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData> *metadata) {
35143527
return versions_->GetLiveFilesMetaData(metadata);
35153528
}
35163529

3530+
Status DBImpl::GetDbIdentity(std::string& identity) {
3531+
std::string idfilename = IdentityFileName(dbname_);
3532+
unique_ptr<SequentialFile> idfile;
3533+
const EnvOptions soptions;
3534+
Status s = env_->NewSequentialFile(idfilename, &idfile, soptions);
3535+
if (!s.ok()) {
3536+
return s;
3537+
}
3538+
uint64_t file_size;
3539+
s = env_->GetFileSize(idfilename, &file_size);
3540+
if (!s.ok()) {
3541+
return s;
3542+
}
3543+
char buffer[file_size];
3544+
Slice id;
3545+
s = idfile->Read(file_size, &id, buffer);
3546+
if (!s.ok()) {
3547+
return s;
3548+
}
3549+
identity.assign(id.ToString());
3550+
// If last character is '\n' remove it from identity
3551+
if (identity.size() > 0 && identity.back() == '\n') {
3552+
identity.pop_back();
3553+
}
3554+
return s;
3555+
}
3556+
35173557
// Default implementations of convenience methods that subclasses of DB
35183558
// can call if they wish
35193559
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {

0 commit comments

Comments
 (0)