Skip to content

Commit fc59d8f

Browse files
jowlyzhangfacebook-github-bot
authored andcommitted
Add public API WriteWithCallback to support custom callbacks (facebook#12603)
Summary: This PR adds a `DB::WriteWithCallback` API that does the same things as `DB::Write` while takes an argument `UserWriteCallback` to execute custom callback functions during the write. We currently support two types of callback functions: `OnWriteEnqueued` and `OnWalWriteFinish`. The former is invoked after the write is enqueued, and the later is invoked after WAL write finishes when applicable. These callback functions are intended for users to use to improve synchronization between concurrent writes, their execution is on the write's critical path so it will impact the write's latency if not used properly. The documentation for the callback interface mentioned this and suggest user to keep these callback functions' implementation minimum. Although transaction interfaces' writes doesn't yet allow user to specify such a user write callback argument, the `DBImpl::Write*` type of APIs do not differentiate between regular DB writes or writes coming from the transaction layer when it comes to supporting this `UserWriteCallback`. These callbacks works for all the write modes including: default write mode, Options.two_write_queues, Options.unordered_write, Options.enable_pipelined_write Pull Request resolved: facebook#12603 Test Plan: Added unit test in ./write_callback_test Reviewed By: anand1976 Differential Revision: D58044638 Pulled By: jowlyzhang fbshipit-source-id: 87a84a0221df8f589ec8fc4d74597e72ce97e4cd
1 parent f3b7e95 commit fc59d8f

12 files changed

+209
-64
lines changed

db/db_impl/db_impl.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include "rocksdb/status.h"
5858
#include "rocksdb/trace_reader_writer.h"
5959
#include "rocksdb/transaction_log.h"
60+
#include "rocksdb/user_write_callback.h"
6061
#include "rocksdb/utilities/replayer.h"
6162
#include "rocksdb/write_buffer_manager.h"
6263
#include "table/merging_iterator.h"
@@ -231,6 +232,10 @@ class DBImpl : public DB {
231232
using DB::Write;
232233
Status Write(const WriteOptions& options, WriteBatch* updates) override;
233234

235+
using DB::WriteWithCallback;
236+
Status WriteWithCallback(const WriteOptions& options, WriteBatch* updates,
237+
UserWriteCallback* user_write_cb) override;
238+
234239
using DB::Get;
235240
Status Get(const ReadOptions& _read_options,
236241
ColumnFamilyHandle* column_family, const Slice& key,
@@ -688,7 +693,8 @@ class DBImpl : public DB {
688693
// thread to determine whether it is safe to perform the write.
689694
virtual Status WriteWithCallback(const WriteOptions& write_options,
690695
WriteBatch* my_batch,
691-
WriteCallback* callback);
696+
WriteCallback* callback,
697+
UserWriteCallback* user_write_cb = nullptr);
692698

693699
// Returns the sequence number that is guaranteed to be smaller than or equal
694700
// to the sequence number of any key that could be inserted into the current
@@ -1497,6 +1503,7 @@ class DBImpl : public DB {
14971503
// batch that does not have duplicate keys.
14981504
Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
14991505
WriteCallback* callback = nullptr,
1506+
UserWriteCallback* user_write_cb = nullptr,
15001507
uint64_t* log_used = nullptr, uint64_t log_ref = 0,
15011508
bool disable_memtable = false, uint64_t* seq_used = nullptr,
15021509
size_t batch_cnt = 0,
@@ -1505,6 +1512,7 @@ class DBImpl : public DB {
15051512

15061513
Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
15071514
WriteCallback* callback = nullptr,
1515+
UserWriteCallback* user_write_cb = nullptr,
15081516
uint64_t* log_used = nullptr, uint64_t log_ref = 0,
15091517
bool disable_memtable = false,
15101518
uint64_t* seq_used = nullptr);
@@ -1531,7 +1539,8 @@ class DBImpl : public DB {
15311539
// marks start of a new sub-batch.
15321540
Status WriteImplWALOnly(
15331541
WriteThread* write_thread, const WriteOptions& options,
1534-
WriteBatch* updates, WriteCallback* callback, uint64_t* log_used,
1542+
WriteBatch* updates, WriteCallback* callback,
1543+
UserWriteCallback* user_write_cb, uint64_t* log_used,
15351544
const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
15361545
PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
15371546
const PublishLastSeq publish_last_seq, const bool disable_memtable);

db/db_impl/db_impl_write.cc

+52-23
Original file line numberDiff line numberDiff line change
@@ -155,21 +155,36 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) {
155155
}
156156
if (s.ok()) {
157157
s = WriteImpl(write_options, my_batch, /*callback=*/nullptr,
158+
/*user_write_cb=*/nullptr,
158159
/*log_used=*/nullptr);
159160
}
160161
return s;
161162
}
162163

164+
Status DBImpl::WriteWithCallback(const WriteOptions& write_options,
165+
WriteBatch* my_batch, WriteCallback* callback,
166+
UserWriteCallback* user_write_cb) {
167+
Status s;
168+
if (write_options.protection_bytes_per_key > 0) {
169+
s = WriteBatchInternal::UpdateProtectionInfo(
170+
my_batch, write_options.protection_bytes_per_key);
171+
}
172+
if (s.ok()) {
173+
s = WriteImpl(write_options, my_batch, callback, user_write_cb);
174+
}
175+
return s;
176+
}
177+
163178
Status DBImpl::WriteWithCallback(const WriteOptions& write_options,
164179
WriteBatch* my_batch,
165-
WriteCallback* callback) {
180+
UserWriteCallback* user_write_cb) {
166181
Status s;
167182
if (write_options.protection_bytes_per_key > 0) {
168183
s = WriteBatchInternal::UpdateProtectionInfo(
169184
my_batch, write_options.protection_bytes_per_key);
170185
}
171186
if (s.ok()) {
172-
s = WriteImpl(write_options, my_batch, callback, nullptr);
187+
s = WriteImpl(write_options, my_batch, /*callback=*/nullptr, user_write_cb);
173188
}
174189
return s;
175190
}
@@ -179,9 +194,9 @@ Status DBImpl::WriteWithCallback(const WriteOptions& write_options,
179194
// published sequence.
180195
Status DBImpl::WriteImpl(const WriteOptions& write_options,
181196
WriteBatch* my_batch, WriteCallback* callback,
182-
uint64_t* log_used, uint64_t log_ref,
183-
bool disable_memtable, uint64_t* seq_used,
184-
size_t batch_cnt,
197+
UserWriteCallback* user_write_cb, uint64_t* log_used,
198+
uint64_t log_ref, bool disable_memtable,
199+
uint64_t* seq_used, size_t batch_cnt,
185200
PreReleaseCallback* pre_release_callback,
186201
PostMemTableCallback* post_memtable_callback) {
187202
assert(!seq_per_batch_ || batch_cnt != 0);
@@ -288,10 +303,10 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
288303
seq_per_batch_ ? kDoAssignOrder : kDontAssignOrder;
289304
// Otherwise it is WAL-only Prepare batches in WriteCommitted policy and
290305
// they don't consume sequence.
291-
return WriteImplWALOnly(&nonmem_write_thread_, write_options, my_batch,
292-
callback, log_used, log_ref, seq_used, batch_cnt,
293-
pre_release_callback, assign_order,
294-
kDontPublishLastSeq, disable_memtable);
306+
return WriteImplWALOnly(
307+
&nonmem_write_thread_, write_options, my_batch, callback, user_write_cb,
308+
log_used, log_ref, seq_used, batch_cnt, pre_release_callback,
309+
assign_order, kDontPublishLastSeq, disable_memtable);
295310
}
296311

297312
if (immutable_db_options_.unordered_write) {
@@ -303,9 +318,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
303318
// Use a write thread to i) optimize for WAL write, ii) publish last
304319
// sequence in in increasing order, iii) call pre_release_callback serially
305320
Status status = WriteImplWALOnly(
306-
&write_thread_, write_options, my_batch, callback, log_used, log_ref,
307-
&seq, sub_batch_cnt, pre_release_callback, kDoAssignOrder,
308-
kDoPublishLastSeq, disable_memtable);
321+
&write_thread_, write_options, my_batch, callback, user_write_cb,
322+
log_used, log_ref, &seq, sub_batch_cnt, pre_release_callback,
323+
kDoAssignOrder, kDoPublishLastSeq, disable_memtable);
309324
TEST_SYNC_POINT("DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL");
310325
if (!status.ok()) {
311326
return status;
@@ -322,14 +337,14 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
322337
}
323338

324339
if (immutable_db_options_.enable_pipelined_write) {
325-
return PipelinedWriteImpl(write_options, my_batch, callback, log_used,
326-
log_ref, disable_memtable, seq_used);
340+
return PipelinedWriteImpl(write_options, my_batch, callback, user_write_cb,
341+
log_used, log_ref, disable_memtable, seq_used);
327342
}
328343

329344
PERF_TIMER_GUARD(write_pre_and_post_process_time);
330-
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
331-
disable_memtable, batch_cnt, pre_release_callback,
332-
post_memtable_callback);
345+
WriteThread::Writer w(write_options, my_batch, callback, user_write_cb,
346+
log_ref, disable_memtable, batch_cnt,
347+
pre_release_callback, post_memtable_callback);
333348
StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
334349

335350
write_thread_.JoinBatchGroup(&w);
@@ -686,15 +701,16 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
686701

687702
Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
688703
WriteBatch* my_batch, WriteCallback* callback,
704+
UserWriteCallback* user_write_cb,
689705
uint64_t* log_used, uint64_t log_ref,
690706
bool disable_memtable, uint64_t* seq_used) {
691707
PERF_TIMER_GUARD(write_pre_and_post_process_time);
692708
StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
693709

694710
WriteContext write_context;
695711

696-
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
697-
disable_memtable, /*_batch_cnt=*/0,
712+
WriteThread::Writer w(write_options, my_batch, callback, user_write_cb,
713+
log_ref, disable_memtable, /*_batch_cnt=*/0,
698714
/*_pre_release_callback=*/nullptr);
699715
write_thread_.JoinBatchGroup(&w);
700716
TEST_SYNC_POINT("DBImplWrite::PipelinedWriteImpl:AfterJoinBatchGroup");
@@ -875,7 +891,8 @@ Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
875891
PERF_TIMER_GUARD(write_pre_and_post_process_time);
876892
StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
877893

878-
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
894+
WriteThread::Writer w(write_options, my_batch, callback,
895+
/*user_write_cb=*/nullptr, log_ref,
879896
false /*disable_memtable*/);
880897

881898
if (w.CheckCallback(this) && w.ShouldWriteToMemtable()) {
@@ -925,13 +942,15 @@ Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
925942
// applicable in a two-queue setting.
926943
Status DBImpl::WriteImplWALOnly(
927944
WriteThread* write_thread, const WriteOptions& write_options,
928-
WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used,
945+
WriteBatch* my_batch, WriteCallback* callback,
946+
UserWriteCallback* user_write_cb, uint64_t* log_used,
929947
const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
930948
PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
931949
const PublishLastSeq publish_last_seq, const bool disable_memtable) {
932950
PERF_TIMER_GUARD(write_pre_and_post_process_time);
933-
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
934-
disable_memtable, sub_batch_cnt, pre_release_callback);
951+
WriteThread::Writer w(write_options, my_batch, callback, user_write_cb,
952+
log_ref, disable_memtable, sub_batch_cnt,
953+
pre_release_callback);
935954
StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
936955

937956
write_thread->JoinBatchGroup(&w);
@@ -1498,6 +1517,11 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
14981517
RecordTick(stats_, WAL_FILE_BYTES, log_size);
14991518
stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal);
15001519
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
1520+
for (auto* writer : write_group) {
1521+
if (!writer->CallbackFailed()) {
1522+
writer->CheckPostWalWriteCallback();
1523+
}
1524+
}
15011525
}
15021526
return io_s;
15031527
}
@@ -1562,6 +1586,11 @@ IOStatus DBImpl::ConcurrentWriteToWAL(
15621586
stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal,
15631587
concurrent);
15641588
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
1589+
for (auto* writer : write_group) {
1590+
if (!writer->CallbackFailed()) {
1591+
writer->CheckPostWalWriteCallback();
1592+
}
1593+
}
15651594
}
15661595
return io_s;
15671596
}

db/write_callback_test.cc

+46-5
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
// This source code is licensed under both the GPLv2 (found in the
33
// COPYING file in the root directory) and Apache 2.0 License
44
// (found in the LICENSE.Apache file in the root directory).
5-
6-
75
#include "db/write_callback.h"
86

97
#include <atomic>
@@ -15,6 +13,7 @@
1513
#include "db/db_impl/db_impl.h"
1614
#include "port/port.h"
1715
#include "rocksdb/db.h"
16+
#include "rocksdb/user_write_callback.h"
1817
#include "rocksdb/write_batch.h"
1918
#include "test_util/sync_point.h"
2019
#include "test_util/testharness.h"
@@ -84,6 +83,28 @@ class MockWriteCallback : public WriteCallback {
8483
bool AllowWriteBatching() override { return allow_batching_; }
8584
};
8685

86+
class MockUserWriteCallback : public UserWriteCallback {
87+
public:
88+
std::atomic<bool> write_enqueued_{false};
89+
std::atomic<bool> wal_write_done_{false};
90+
91+
MockUserWriteCallback() = default;
92+
93+
MockUserWriteCallback(const MockUserWriteCallback& other) {
94+
write_enqueued_.store(other.write_enqueued_.load());
95+
wal_write_done_.store(other.wal_write_done_.load());
96+
}
97+
98+
void OnWriteEnqueued() override { write_enqueued_.store(true); }
99+
100+
void OnWalWriteFinish() override { wal_write_done_.store(true); }
101+
102+
void Reset() {
103+
write_enqueued_.store(false);
104+
wal_write_done_.store(false);
105+
}
106+
};
107+
87108
#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
88109
class WriteCallbackPTest
89110
: public WriteCallbackTest,
@@ -119,9 +140,11 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
119140
kvs_.clear();
120141
write_batch_.Clear();
121142
callback_.was_called_.store(false);
143+
user_write_cb_.Reset();
122144
}
123145

124146
MockWriteCallback callback_;
147+
MockUserWriteCallback user_write_cb_;
125148
WriteBatch write_batch_;
126149
std::vector<std::pair<string, string>> kvs_;
127150
};
@@ -327,18 +350,26 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
327350
ASSERT_OK(WriteBatchInternal::InsertNoop(&write_op.write_batch_));
328351
const size_t ONE_BATCH = 1;
329352
s = db_impl->WriteImpl(woptions, &write_op.write_batch_,
330-
&write_op.callback_, nullptr, 0, false, nullptr,
331-
ONE_BATCH,
353+
&write_op.callback_, &write_op.user_write_cb_,
354+
nullptr, 0, false, nullptr, ONE_BATCH,
332355
two_queues_ ? &publish_seq_callback : nullptr);
333356
} else {
334357
s = db_impl->WriteWithCallback(woptions, &write_op.write_batch_,
335-
&write_op.callback_);
358+
&write_op.callback_,
359+
&write_op.user_write_cb_);
336360
}
337361

362+
ASSERT_TRUE(write_op.user_write_cb_.write_enqueued_.load());
338363
if (write_op.callback_.should_fail_) {
339364
ASSERT_TRUE(s.IsBusy());
365+
ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
340366
} else {
341367
ASSERT_OK(s);
368+
if (enable_WAL_) {
369+
ASSERT_TRUE(write_op.user_write_cb_.wal_write_done_.load());
370+
} else {
371+
ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
372+
}
342373
}
343374
};
344375

@@ -440,6 +471,16 @@ TEST_F(WriteCallbackTest, WriteCallBackTest) {
440471
ASSERT_OK(s);
441472
ASSERT_EQ("value.a2", value);
442473

474+
MockUserWriteCallback user_write_cb;
475+
WriteBatch wb4;
476+
ASSERT_OK(wb4.Put("a", "value.a4"));
477+
478+
ASSERT_OK(db->WriteWithCallback(write_options, &wb4, &user_write_cb));
479+
ASSERT_OK(db->Get(read_options, "a", &value));
480+
ASSERT_EQ(value, "value.a4");
481+
ASSERT_TRUE(user_write_cb.write_enqueued_.load());
482+
ASSERT_TRUE(user_write_cb.wal_write_done_.load());
483+
443484
delete db;
444485
ASSERT_OK(DestroyDB(dbname, options));
445486
}

db/write_thread.cc

+2
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,8 @@ void WriteThread::JoinBatchGroup(Writer* w) {
404404

405405
bool linked_as_leader = LinkOne(w, &newest_writer_);
406406

407+
w->CheckWriteEnqueuedCallback();
408+
407409
if (linked_as_leader) {
408410
SetState(w, STATE_GROUP_LEADER);
409411
}

0 commit comments

Comments
 (0)