Skip to content

Commit a8029fd

Browse files
committed
Introduce MergeContext to Lazily Initialize merge operand list
Summary: In get operations, merge_operands is only used in few cases. Lazily initialize it can reduce average latency in some cases Test Plan: make all check Reviewers: haobo, kailiu, dhruba Reviewed By: haobo CC: igor, nkg-, leveldb Differential Revision: https://reviews.facebook.net/D14415 Conflicts: db/db_impl.cc db/memtable.cc
1 parent bc5dd19 commit a8029fd

10 files changed

+129
-53
lines changed

db/db_impl.cc

+11-10
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "db/log_writer.h"
2828
#include "db/memtable.h"
2929
#include "db/memtablelist.h"
30+
#include "db/merge_context.h"
3031
#include "db/merge_helper.h"
3132
#include "db/prefix_filter_iterator.h"
3233
#include "db/table_cache.h"
@@ -2608,20 +2609,20 @@ Status DBImpl::GetImpl(const ReadOptions& options,
26082609

26092610

26102611
// Prepare to store a list of merge operations if merge occurs.
2611-
std::deque<std::string> merge_operands;
2612+
MergeContext merge_context;
26122613

26132614
// First look in the memtable, then in the immutable memtable (if any).
26142615
// s is both in/out. When in, s could either be OK or MergeInProgress.
26152616
// merge_operands will contain the sequence of merges in the latter case.
26162617
LookupKey lkey(key, snapshot);
2617-
if (mem->Get(lkey, value, &s, &merge_operands, options_)) {
2618+
if (mem->Get(lkey, value, &s, merge_context, options_)) {
26182619
// Done
26192620
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
2620-
} else if (imm.Get(lkey, value, &s, &merge_operands, options_)) {
2621+
} else if (imm.Get(lkey, value, &s, merge_context, options_)) {
26212622
// Done
26222623
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
26232624
} else {
2624-
current->Get(options, lkey, value, &s, &merge_operands, &stats,
2625+
current->Get(options, lkey, value, &s, &merge_context, &stats,
26252626
options_, value_found);
26262627
have_stat_update = true;
26272628
RecordTick(options_.statistics.get(), MEMTABLE_MISS);
@@ -2676,8 +2677,8 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
26762677
bool have_stat_update = false;
26772678
Version::GetStats stats;
26782679

2679-
// Prepare to store a list of merge operations if merge occurs.
2680-
std::deque<std::string> merge_operands;
2680+
// Contain a list of merge operations if merge occurs.
2681+
MergeContext merge_context;
26812682

26822683
// Note: this always resizes the values array
26832684
int numKeys = keys.size();
@@ -2692,17 +2693,17 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
26922693
// s is both in/out. When in, s could either be OK or MergeInProgress.
26932694
// merge_operands will contain the sequence of merges in the latter case.
26942695
for (int i=0; i<numKeys; ++i) {
2695-
merge_operands.clear();
2696+
merge_context.Clear();
26962697
Status& s = statList[i];
26972698
std::string* value = &(*values)[i];
26982699

26992700
LookupKey lkey(keys[i], snapshot);
2700-
if (mem->Get(lkey, value, &s, &merge_operands, options_)) {
2701+
if (mem->Get(lkey, value, &s, merge_context, options_)) {
27012702
// Done
2702-
} else if (imm.Get(lkey, value, &s, &merge_operands, options_)) {
2703+
} else if (imm.Get(lkey, value, &s, merge_context, options_)) {
27032704
// Done
27042705
} else {
2705-
current->Get(options, lkey, value, &s, &merge_operands, &stats, options_);
2706+
current->Get(options, lkey, value, &s, &merge_context, &stats, options_);
27062707
have_stat_update = true;
27072708
}
27082709

db/db_impl_readonly.cc

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
#include "db/log_reader.h"
2424
#include "db/log_writer.h"
2525
#include "db/memtable.h"
26+
#include "db/merge_context.h"
2627
#include "db/table_cache.h"
2728
#include "db/version_set.h"
2829
#include "db/write_batch_internal.h"
2930
#include "rocksdb/db.h"
3031
#include "rocksdb/env.h"
3132
#include "rocksdb/status.h"
3233
#include "rocksdb/table.h"
34+
#include "rocksdb/merge_operator.h"
3335
#include "port/port.h"
3436
#include "table/block.h"
3537
#include "table/merger.h"
@@ -57,12 +59,12 @@ Status DBImplReadOnly::Get(const ReadOptions& options,
5759
MemTable* mem = GetMemTable();
5860
Version* current = versions_->current();
5961
SequenceNumber snapshot = versions_->LastSequence();
60-
std::deque<std::string> merge_operands;
62+
MergeContext merge_context;
6163
LookupKey lkey(key, snapshot);
62-
if (mem->Get(lkey, value, &s, &merge_operands, options_)) {
64+
if (mem->Get(lkey, value, &s, merge_context, options_)) {
6365
} else {
6466
Version::GetStats stats;
65-
current->Get(options, lkey, value, &s, &merge_operands, &stats, options_);
67+
current->Get(options, lkey, value, &s, &merge_context, &stats, options_);
6668
}
6769
return s;
6870
}

db/memtable.cc

+15-17
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <memory>
1313

1414
#include "db/dbformat.h"
15+
#include "db/merge_context.h"
1516
#include "rocksdb/comparator.h"
1617
#include "rocksdb/env.h"
1718
#include "rocksdb/iterator.h"
@@ -162,15 +163,12 @@ void MemTable::Add(SequenceNumber s, ValueType type,
162163
}
163164

164165
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
165-
std::deque<std::string>* operands, const Options& options) {
166+
MergeContext& merge_context, const Options& options) {
166167
Slice memkey = key.memtable_key();
167168
std::shared_ptr<MemTableRep::Iterator> iter(
168169
table_->GetIterator(key.user_key()));
169170
iter->Seek(memkey.data());
170171

171-
// It is the caller's responsibility to allocate/delete operands list
172-
assert(operands != nullptr);
173-
174172
bool merge_in_progress = s->IsMergeInProgress();
175173
auto merge_operator = options.merge_operator.get();
176174
auto logger = options.info_log;
@@ -202,8 +200,9 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
202200
*s = Status::OK();
203201
if (merge_in_progress) {
204202
assert(merge_operator);
205-
if (!merge_operator->FullMerge(key.user_key(), &v, *operands,
206-
value, logger.get())) {
203+
if (!merge_operator->FullMerge(key.user_key(), &v,
204+
merge_context.GetOperands(), value,
205+
logger.get())) {
207206
RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES);
208207
*s = Status::Corruption("Error: Could not perform merge.");
209208
}
@@ -219,8 +218,9 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
219218
if (merge_in_progress) {
220219
assert(merge_operator);
221220
*s = Status::OK();
222-
if (!merge_operator->FullMerge(key.user_key(), nullptr, *operands,
223-
value, logger.get())) {
221+
if (!merge_operator->FullMerge(key.user_key(), nullptr,
222+
merge_context.GetOperands(), value,
223+
logger.get())) {
224224
RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES);
225225
*s = Status::Corruption("Error: Could not perform merge.");
226226
}
@@ -232,16 +232,14 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
232232
case kTypeMerge: {
233233
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
234234
merge_in_progress = true;
235-
operands->push_front(v.ToString());
236-
while(operands->size() >= 2) {
235+
merge_context.PushOperand(v);
236+
while(merge_context.GetNumOperands() >= 2) {
237237
// Attempt to associative merge. (Returns true if successful)
238-
if (merge_operator->PartialMerge(key.user_key(),
239-
Slice((*operands)[0]),
240-
Slice((*operands)[1]),
241-
&merge_result,
242-
logger.get())) {
243-
operands->pop_front();
244-
swap(operands->front(), merge_result);
238+
if (merge_operator->PartialMerge(key.user_key(),
239+
merge_context.GetOperand(0),
240+
merge_context.GetOperand(1),
241+
&merge_result, logger.get())) {
242+
merge_context.PushPartialMergeResult(merge_result);
245243
} else {
246244
// Stack them because user can't associative merge
247245
break;

db/memtable.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ namespace rocksdb {
2222

2323
class Mutex;
2424
class MemTableIterator;
25+
class MergeContext;
2526

2627
class MemTable {
2728
public:
@@ -94,7 +95,7 @@ class MemTable {
9495
// store MergeInProgress in s, and return false.
9596
// Else, return false.
9697
bool Get(const LookupKey& key, std::string* value, Status* s,
97-
std::deque<std::string>* operands, const Options& options);
98+
MergeContext& merge_context, const Options& options);
9899

99100
// Update the value and return status ok,
100101
// if key exists in current memtable

db/memtablelist.cc

+2-3
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,9 @@ size_t MemTableList::ApproximateMemoryUsage() {
204204
// Return the most recent value found, if any.
205205
// Operands stores the list of merge operations to apply, so far.
206206
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s,
207-
std::deque<std::string>* operands,
208-
const Options& options) {
207+
MergeContext& merge_context, const Options& options) {
209208
for (auto &memtable : memlist_) {
210-
if (memtable->Get(key, value, s, operands, options)) {
209+
if (memtable->Get(key, value, s, merge_context, options)) {
211210
return true;
212211
}
213212
}

db/memtablelist.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class MemTableList {
7878
// Search all the memtables starting from the most recent one.
7979
// Return the most recent value found, if any.
8080
bool Get(const LookupKey& key, std::string* value, Status* s,
81-
std::deque<std::string>* operands, const Options& options);
81+
MergeContext& merge_context, const Options& options);
8282

8383
// Returns the list of underlying memtables.
8484
void GetMemTables(std::vector<MemTable*>* list);

db/merge_context.h

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
2+
// This source code is licensed under the BSD-style license found in the
3+
// LICENSE file in the root directory of this source tree. An additional grant
4+
// of patent rights can be found in the PATENTS file in the same directory.
5+
//
6+
#pragma once
7+
#include "db/dbformat.h"
8+
#include "rocksdb/slice.h"
9+
#include <string>
10+
#include <deque>
11+
12+
namespace rocksdb {
13+
14+
const std::deque<std::string> empty_operand_list;
15+
16+
// The merge context for merging a user key.
17+
// When doing a Get(), DB will create such a class and pass it when
18+
// issuing Get() operation to memtables and version_set. The operands
19+
// will be fetched from the context when issuing partial of full merge.
20+
class MergeContext {
21+
public:
22+
// Clear all the operands
23+
void Clear() {
24+
if (operand_list) {
25+
operand_list->clear();
26+
}
27+
}
28+
// Replace the first two operands of merge_result, which are expected be the
29+
// merge results of them.
30+
void PushPartialMergeResult(std::string& merge_result) {
31+
assert (operand_list);
32+
operand_list->pop_front();
33+
swap(operand_list->front(), merge_result);
34+
}
35+
// Push a merge operand
36+
void PushOperand(const Slice& operand_slice) {
37+
Initialize();
38+
operand_list->push_front(operand_slice.ToString());
39+
}
40+
// return total number of operands in the list
41+
size_t GetNumOperands() const {
42+
if (!operand_list) {
43+
return 0;
44+
}
45+
return operand_list->size();
46+
}
47+
// Get the operand at the index.
48+
Slice GetOperand(int index) const {
49+
assert (operand_list);
50+
return (*operand_list)[index];
51+
}
52+
// Return all the operands.
53+
const std::deque<std::string>& GetOperands() const {
54+
if (!operand_list) {
55+
return empty_operand_list;
56+
}
57+
return *operand_list;
58+
}
59+
private:
60+
void Initialize() {
61+
if (!operand_list) {
62+
operand_list.reset(new std::deque<std::string>());
63+
}
64+
}
65+
std::unique_ptr<std::deque<std::string>> operand_list;
66+
};
67+
68+
} // namespace rocksdb
69+

db/version_set.cc

+19-15
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "db/log_reader.h"
1717
#include "db/log_writer.h"
1818
#include "db/memtable.h"
19+
#include "db/merge_context.h"
1920
#include "db/table_cache.h"
2021
#include "rocksdb/env.h"
2122
#include "rocksdb/merge_operator.h"
@@ -287,7 +288,8 @@ struct Saver {
287288
bool* value_found; // Is value set correctly? Used by KeyMayExist
288289
std::string* value;
289290
const MergeOperator* merge_operator;
290-
std::deque<std::string>* merge_operands; // the merge operations encountered
291+
// the merge operations encountered;
292+
MergeContext* merge_context;
291293
Logger* logger;
292294
bool didIO; // did we do any disk io?
293295
Statistics* statistics;
@@ -309,10 +311,10 @@ static void MarkKeyMayExist(void* arg) {
309311

310312
static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
311313
Saver* s = reinterpret_cast<Saver*>(arg);
312-
std::deque<std::string>* const ops = s->merge_operands; // shorter alias
314+
MergeContext* merge_contex = s->merge_context;
313315
std::string merge_result; // temporary area for merge results later
314316

315-
assert(s != nullptr && ops != nullptr);
317+
assert(s != nullptr && merge_contex != nullptr);
316318

317319
ParsedInternalKey parsed_key;
318320
// TODO: didIO and Merge?
@@ -331,7 +333,8 @@ static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
331333
} else if (kMerge == s->state) {
332334
assert(s->merge_operator != nullptr);
333335
s->state = kFound;
334-
if (!s->merge_operator->FullMerge(s->user_key, &v, *ops,
336+
if (!s->merge_operator->FullMerge(s->user_key, &v,
337+
merge_contex->GetOperands(),
335338
s->value, s->logger)) {
336339
RecordTick(s->statistics, NUMBER_MERGE_FAILURES);
337340
s->state = kCorrupt;
@@ -346,8 +349,9 @@ static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
346349
s->state = kDeleted;
347350
} else if (kMerge == s->state) {
348351
s->state = kFound;
349-
if (!s->merge_operator->FullMerge(s->user_key, nullptr, *ops,
350-
s->value, s->logger)) {
352+
if (!s->merge_operator->FullMerge(s->user_key, nullptr,
353+
merge_contex->GetOperands(),
354+
s->value, s->logger)) {
351355
RecordTick(s->statistics, NUMBER_MERGE_FAILURES);
352356
s->state = kCorrupt;
353357
}
@@ -359,16 +363,15 @@ static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
359363
case kTypeMerge:
360364
assert(s->state == kNotFound || s->state == kMerge);
361365
s->state = kMerge;
362-
ops->push_front(v.ToString());
363-
while (ops->size() >= 2) {
366+
merge_contex->PushOperand(v);
367+
while (merge_contex->GetNumOperands() >= 2) {
364368
// Attempt to merge operands together via user associateive merge
365369
if (s->merge_operator->PartialMerge(s->user_key,
366-
Slice((*ops)[0]),
367-
Slice((*ops)[1]),
370+
merge_contex->GetOperand(0),
371+
merge_contex->GetOperand(1),
368372
&merge_result,
369373
s->logger)) {
370-
ops->pop_front();
371-
swap(ops->front(), merge_result);
374+
merge_contex->PushPartialMergeResult(merge_result);
372375
} else {
373376
// Associative merge returns false ==> stack the operands
374377
break;
@@ -417,7 +420,7 @@ void Version::Get(const ReadOptions& options,
417420
const LookupKey& k,
418421
std::string* value,
419422
Status* status,
420-
std::deque<std::string>* operands,
423+
MergeContext* merge_context,
421424
GetStats* stats,
422425
const Options& db_options,
423426
bool* value_found) {
@@ -436,7 +439,7 @@ void Version::Get(const ReadOptions& options,
436439
saver.value_found = value_found;
437440
saver.value = value;
438441
saver.merge_operator = merge_operator;
439-
saver.merge_operands = operands;
442+
saver.merge_context = merge_context;
440443
saver.logger = logger.get();
441444
saver.didIO = false;
442445
saver.statistics = db_options.statistics.get();
@@ -557,7 +560,8 @@ void Version::Get(const ReadOptions& options,
557560
if (kMerge == saver.state) {
558561
// merge_operands are in saver and we hit the beginning of the key history
559562
// do a final merge of nullptr and operands;
560-
if (merge_operator->FullMerge(user_key, nullptr, *saver.merge_operands,
563+
if (merge_operator->FullMerge(user_key, nullptr,
564+
saver.merge_context->GetOperands(),
561565
value, logger.get())) {
562566
*status = Status::OK();
563567
} else {

0 commit comments

Comments
 (0)