Skip to content

Commit 4c2b1f0

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents a5d2863 + 40ddc3d commit 4c2b1f0

File tree

97 files changed

+1781
-828
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+1781
-828
lines changed

.travis.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ before_install:
1414
- sudo dpkg -i libgflags-dev_2.0-1_amd64.deb
1515
# Lousy hack to disable use and testing of fallocate, which doesn't behave quite
1616
# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
17-
- sed -i "s/fallocate(/HACK_NO_fallocate(/" build_tools/build_detect_platform
18-
script: make check -j8
17+
script: OPT=-DTRAVIS make check -j8
1918
notifications:
2019
email: false

HISTORY.md

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Rocksdb Change Log
22

3-
### Unreleased
3+
## Unreleased
4+
5+
----- Past Releases -----
6+
7+
## 3.5.0 (9/3/2014)
48
### New Features
59
* Add include/utilities/write_batch_with_index.h, providing a utilitiy class to query data out of WriteBatch when building it.
610
* Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include:
@@ -11,15 +15,12 @@
1115
### Public API changes
1216
* The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key.
1317

14-
15-
----- Past Releases -----
16-
17-
1818
## 3.4.0 (8/18/2014)
1919
### New Features
2020
* Support Multiple DB paths in universal style compactions
2121
* Add feature of storing plain table index and bloom filter in SST file.
2222
* CompactRange() will never output compacted files to level 0. This used to be the case when all the compaction input files were at level 0.
23+
* Added iterate_upper_bound to define the extent upto which the forward iterator will return entries. This will prevent iterating over delete markers and overwritten entries for edge cases where you want to break out the iterator anyways. This may improve perfomance in case there are a large number of delete markers or overwritten entries.
2324

2425
### Public API changes
2526
* DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size

Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ TOOLS = \
132132
options_test \
133133
blob_store_bench
134134

135-
PROGRAMS = db_bench signal_test table_reader_bench log_and_apply_bench $(TOOLS)
135+
PROGRAMS = db_bench signal_test table_reader_bench log_and_apply_bench cache_bench $(TOOLS)
136136

137137
# The library name is configurable since we are maintaining libraries of both
138138
# debug/release mode.
@@ -264,6 +264,9 @@ $(LIBRARY): $(LIBOBJECTS)
264264
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
265265
$(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
266266

267+
cache_bench: util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL)
268+
$(CXX) util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
269+
267270
block_hash_index_test: table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
268271
$(CXX) table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
269272

build_tools/regression_build_test.sh

+34
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,38 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
344344
--threads=32 \
345345
--writes_per_second=81920 > ${STAT_FILE}.seekwhilewriting_in_ram
346346

347+
# measure fillseq with bunch of column families
348+
./db_bench \
349+
--benchmarks=fillseq \
350+
--num_column_families=500 \
351+
--write_buffer_size=1048576 \
352+
--db=$DATA_DIR \
353+
--use_existing_db=0 \
354+
--num=$NUM \
355+
--writes=$NUM \
356+
--open_files=55000 \
357+
--statistics=1 \
358+
--histogram=1 \
359+
--disable_data_sync=1 \
360+
--disable_wal=1 \
361+
--sync=0 > ${STAT_FILE}.fillseq_lots_column_families
362+
363+
# measure overwrite performance with bunch of column families
364+
./db_bench \
365+
--benchmarks=overwrite \
366+
--num_column_families=500 \
367+
--write_buffer_size=1048576 \
368+
--db=$DATA_DIR \
369+
--use_existing_db=1 \
370+
--num=$NUM \
371+
--writes=$((NUM / 10)) \
372+
--open_files=55000 \
373+
--statistics=1 \
374+
--histogram=1 \
375+
--disable_data_sync=1 \
376+
--disable_wal=1 \
377+
--sync=0 \
378+
--threads=8 > ${STAT_FILE}.overwrite_lots_column_families
347379

348380
# send data to ods
349381
function send_to_ods {
@@ -392,3 +424,5 @@ send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadr
392424
send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting
393425
send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram
394426
send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram
427+
send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families
428+
send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families

db/builder.cc

+22-18
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,24 @@ namespace rocksdb {
2626

2727
class TableFactory;
2828

29-
TableBuilder* NewTableBuilder(const Options& options,
29+
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
3030
const InternalKeyComparator& internal_comparator,
3131
WritableFile* file,
32-
CompressionType compression_type) {
33-
return options.table_factory->NewTableBuilder(options, internal_comparator,
34-
file, compression_type);
32+
const CompressionType compression_type,
33+
const CompressionOptions& compression_opts) {
34+
return ioptions.table_factory->NewTableBuilder(
35+
ioptions, internal_comparator, file, compression_type, compression_opts);
3536
}
3637

37-
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
38-
const EnvOptions& soptions, TableCache* table_cache,
38+
Status BuildTable(const std::string& dbname, Env* env,
39+
const ImmutableCFOptions& ioptions,
40+
const EnvOptions& env_options, TableCache* table_cache,
3941
Iterator* iter, FileMetaData* meta,
4042
const InternalKeyComparator& internal_comparator,
4143
const SequenceNumber newest_snapshot,
4244
const SequenceNumber earliest_seqno_in_memtable,
4345
const CompressionType compression,
46+
const CompressionOptions& compression_opts,
4447
const Env::IOPriority io_priority) {
4548
Status s;
4649
meta->fd.file_size = 0;
@@ -50,23 +53,24 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
5053
// If the sequence number of the smallest entry in the memtable is
5154
// smaller than the most recent snapshot, then we do not trigger
5255
// removal of duplicate/deleted keys as part of this builder.
53-
bool purge = options.purge_redundant_kvs_while_flush;
56+
bool purge = ioptions.purge_redundant_kvs_while_flush;
5457
if (earliest_seqno_in_memtable <= newest_snapshot) {
5558
purge = false;
5659
}
5760

58-
std::string fname = TableFileName(options.db_paths, meta->fd.GetNumber(),
61+
std::string fname = TableFileName(ioptions.db_paths, meta->fd.GetNumber(),
5962
meta->fd.GetPathId());
6063
if (iter->Valid()) {
6164
unique_ptr<WritableFile> file;
62-
s = env->NewWritableFile(fname, &file, soptions);
65+
s = env->NewWritableFile(fname, &file, env_options);
6366
if (!s.ok()) {
6467
return s;
6568
}
6669
file->SetIOPriority(io_priority);
6770

68-
TableBuilder* builder =
69-
NewTableBuilder(options, internal_comparator, file.get(), compression);
71+
TableBuilder* builder = NewTableBuilder(
72+
ioptions, internal_comparator, file.get(),
73+
compression, compression_opts);
7074

7175
// the first key is the smallest key
7276
Slice key = iter->key();
@@ -75,8 +79,8 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
7579
meta->largest_seqno = meta->smallest_seqno;
7680

7781
MergeHelper merge(internal_comparator.user_comparator(),
78-
options.merge_operator.get(), options.info_log.get(),
79-
options.min_partial_merge_operands,
82+
ioptions.merge_operator, ioptions.info_log,
83+
ioptions.min_partial_merge_operands,
8084
true /* internal key corruption is not ok */);
8185

8286
if (purge) {
@@ -196,12 +200,12 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
196200
delete builder;
197201

198202
// Finish and check for file errors
199-
if (s.ok() && !options.disableDataSync) {
200-
if (options.use_fsync) {
201-
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
203+
if (s.ok() && !ioptions.disable_data_sync) {
204+
if (ioptions.use_fsync) {
205+
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
202206
s = file->Fsync();
203207
} else {
204-
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
208+
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
205209
s = file->Sync();
206210
}
207211
}
@@ -211,7 +215,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
211215

212216
if (s.ok()) {
213217
// Verify that the table is usable
214-
Iterator* it = table_cache->NewIterator(ReadOptions(), soptions,
218+
Iterator* it = table_cache->NewIterator(ReadOptions(), env_options,
215219
internal_comparator, meta->fd);
216220
s = it->status();
217221
delete it;

db/builder.h

+8-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "rocksdb/status.h"
1212
#include "rocksdb/types.h"
1313
#include "rocksdb/options.h"
14+
#include "rocksdb/immutable_options.h"
1415

1516
namespace rocksdb {
1617

@@ -26,22 +27,26 @@ class TableBuilder;
2627
class WritableFile;
2728

2829
extern TableBuilder* NewTableBuilder(
29-
const Options& options, const InternalKeyComparator& internal_comparator,
30-
WritableFile* file, CompressionType compression_type);
30+
const ImmutableCFOptions& options,
31+
const InternalKeyComparator& internal_comparator,
32+
WritableFile* file, const CompressionType compression_type,
33+
const CompressionOptions& compression_opts);
3134

3235
// Build a Table file from the contents of *iter. The generated file
3336
// will be named according to number specified in meta. On success, the rest of
3437
// *meta will be filled with metadata about the generated table.
3538
// If no data is present in *iter, meta->file_size will be set to
3639
// zero, and no Table file will be produced.
3740
extern Status BuildTable(const std::string& dbname, Env* env,
38-
const Options& options, const EnvOptions& soptions,
41+
const ImmutableCFOptions& options,
42+
const EnvOptions& env_options,
3943
TableCache* table_cache, Iterator* iter,
4044
FileMetaData* meta,
4145
const InternalKeyComparator& internal_comparator,
4246
const SequenceNumber newest_snapshot,
4347
const SequenceNumber earliest_seqno_in_memtable,
4448
const CompressionType compression,
49+
const CompressionOptions& compression_opts,
4550
const Env::IOPriority io_priority = Env::IO_HIGH);
4651

4752
} // namespace rocksdb

db/c.cc

+7
Original file line numberDiff line numberDiff line change
@@ -1844,6 +1844,13 @@ void rocksdb_readoptions_set_snapshot(
18441844
opt->rep.snapshot = (snap ? snap->rep : nullptr);
18451845
}
18461846

1847+
void rocksdb_readoptions_set_iterate_upper_bound(
1848+
rocksdb_readoptions_t* opt,
1849+
const char* key, size_t keylen) {
1850+
Slice prefix = Slice(key, keylen);
1851+
opt->rep.iterate_upper_bound = &prefix;
1852+
}
1853+
18471854
void rocksdb_readoptions_set_read_tier(
18481855
rocksdb_readoptions_t* opt, int v) {
18491856
opt->rep.read_tier = static_cast<rocksdb::ReadTier>(v);

db/column_family.cc

+8-7
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
178178
Version* dummy_versions, Cache* table_cache,
179179
const ColumnFamilyOptions& options,
180180
const DBOptions* db_options,
181-
const EnvOptions& storage_options,
181+
const EnvOptions& env_options,
182182
ColumnFamilySet* column_family_set)
183183
: id_(id),
184184
name_(name),
@@ -188,6 +188,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
188188
dropped_(false),
189189
internal_comparator_(options.comparator),
190190
options_(*db_options, SanitizeOptions(&internal_comparator_, options)),
191+
ioptions_(options_),
191192
mem_(nullptr),
192193
imm_(options_.min_write_buffer_number_to_merge),
193194
super_version_(nullptr),
@@ -204,7 +205,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
204205
if (dummy_versions != nullptr) {
205206
internal_stats_.reset(
206207
new InternalStats(options_.num_levels, db_options->env, this));
207-
table_cache_.reset(new TableCache(&options_, storage_options, table_cache));
208+
table_cache_.reset(new TableCache(ioptions_, env_options, table_cache));
208209
if (options_.compaction_style == kCompactionStyleUniversal) {
209210
compaction_picker_.reset(
210211
new UniversalCompactionPicker(&options_, &internal_comparator_));
@@ -306,7 +307,7 @@ void ColumnFamilyData::RecalculateWriteStallRateLimitsConditions() {
306307
}
307308

308309
const EnvOptions* ColumnFamilyData::soptions() const {
309-
return &(column_family_set_->storage_options_);
310+
return &(column_family_set_->env_options_);
310311
}
311312

312313
void ColumnFamilyData::SetCurrent(Version* current) {
@@ -462,16 +463,16 @@ void ColumnFamilyData::ResetThreadLocalSuperVersions() {
462463

463464
ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
464465
const DBOptions* db_options,
465-
const EnvOptions& storage_options,
466+
const EnvOptions& env_options,
466467
Cache* table_cache)
467468
: max_column_family_(0),
468469
dummy_cfd_(new ColumnFamilyData(0, "", nullptr, nullptr,
469470
ColumnFamilyOptions(), db_options,
470-
storage_options_, nullptr)),
471+
env_options_, nullptr)),
471472
default_cfd_cache_(nullptr),
472473
db_name_(dbname),
473474
db_options_(db_options),
474-
storage_options_(storage_options),
475+
env_options_(env_options),
475476
table_cache_(table_cache),
476477
spin_lock_(ATOMIC_FLAG_INIT) {
477478
// initialize linked list
@@ -537,7 +538,7 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
537538
assert(column_families_.find(name) == column_families_.end());
538539
ColumnFamilyData* new_cfd =
539540
new ColumnFamilyData(id, name, dummy_versions, table_cache_, options,
540-
db_options_, storage_options_, this);
541+
db_options_, env_options_, this);
541542
Lock();
542543
column_families_.insert({name, id});
543544
column_family_data_.insert({id, new_cfd});

db/column_family.h

+8-5
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,11 @@ class ColumnFamilyData {
165165
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
166166
uint64_t GetLogNumber() const { return log_number_; }
167167

168-
// thread-safe
168+
// TODO(ljin): make this API thread-safe once we allow updating options_
169169
const Options* options() const { return &options_; }
170+
// thread-safe
170171
const EnvOptions* soptions() const;
172+
const ImmutableCFOptions* ioptions() const { return &ioptions_; }
171173

172174
InternalStats* internal_stats() { return internal_stats_.get(); }
173175

@@ -251,7 +253,7 @@ class ColumnFamilyData {
251253
Version* dummy_versions, Cache* table_cache,
252254
const ColumnFamilyOptions& options,
253255
const DBOptions* db_options,
254-
const EnvOptions& storage_options,
256+
const EnvOptions& env_options,
255257
ColumnFamilySet* column_family_set);
256258

257259
// Recalculate some small conditions, which are changed only during
@@ -272,7 +274,8 @@ class ColumnFamilyData {
272274

273275
const InternalKeyComparator internal_comparator_;
274276

275-
Options const options_;
277+
const Options options_;
278+
const ImmutableCFOptions ioptions_;
276279

277280
std::unique_ptr<TableCache> table_cache_;
278281

@@ -367,7 +370,7 @@ class ColumnFamilySet {
367370
};
368371

369372
ColumnFamilySet(const std::string& dbname, const DBOptions* db_options,
370-
const EnvOptions& storage_options, Cache* table_cache);
373+
const EnvOptions& env_options, Cache* table_cache);
371374
~ColumnFamilySet();
372375

373376
ColumnFamilyData* GetDefault() const;
@@ -420,7 +423,7 @@ class ColumnFamilySet {
420423

421424
const std::string db_name_;
422425
const DBOptions* const db_options_;
423-
const EnvOptions storage_options_;
426+
const EnvOptions env_options_;
424427
Cache* table_cache_;
425428
std::atomic_flag spin_lock_;
426429
};

db/compaction.cc

+3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
#include "db/compaction.h"
1111

12+
#ifndef __STDC_FORMAT_MACROS
1213
#define __STDC_FORMAT_MACROS
14+
#endif
15+
1316
#include <inttypes.h>
1417
#include <vector>
1518

db/compaction_picker.cc

+3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
#include "db/compaction_picker.h"
1111

12+
#ifndef __STDC_FORMAT_MACROS
1213
#define __STDC_FORMAT_MACROS
14+
#endif
15+
1316
#include <inttypes.h>
1417
#include <limits>
1518
#include "db/filename.h"

0 commit comments

Comments
 (0)