Skip to content

Commit 3ce8d9a

Browse files
committed
Add support for plain table format to sst_dump.
Summary: This diff enables the command line tool `sst_dump` to work for sst files under plain table format. Changes include: * In tools/sst_dump.cc: - add support for plain table format - display prefix_extractor information when --show_properties is on * In table/format.cc - Now the table magic number of a Footer can be later initialized via ReadFooterFromFile(). * In table/meta_bocks: - add function ReadTableMagicNumber() that reads the magic number of the specified file. Minor fixes: - remove a duplicate #include in table/table_test.cc - fix a commentary typo in include/rocksdb/memtablerep.h - fix lint errors. Test Plan: Runs sst_dump with both block-based and plain-table format files with different arguments, specifically those with --show-properties and --from. * sample output: https://reviews.facebook.net/P261 Reviewers: kailiu, sdong, xjin CC: leveldb Differential Revision: https://reviews.facebook.net/D15903
1 parent 1560bb9 commit 3ce8d9a

File tree

8 files changed

+242
-194
lines changed

8 files changed

+242
-194
lines changed

include/rocksdb/memtablerep.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
// types built in:
2222
// - SkipListRep: This is the default; it is backed by a skip list.
2323
// - HashSkipListRep: The memtable rep that is best used for keys that are
24-
// structured like "prefix:suffix" where iteration withing a prefix is
24+
// structured like "prefix:suffix" where iteration within a prefix is
2525
// common and iteration across different prefixes is rare. It is backed by
2626
// a hash map where each bucket is a skip list.
2727
// - VectorRep: This is backed by an unordered std::vector. On iteration, the
@@ -85,7 +85,7 @@ class MemTableRep {
8585
// Initialize an iterator over the specified collection.
8686
// The returned iterator is not valid.
8787
// explicit Iterator(const MemTableRep* collection);
88-
virtual ~Iterator() { };
88+
virtual ~Iterator() {}
8989

9090
// Returns true iff the iterator is positioned at a valid node.
9191
virtual bool Valid() const = 0;
@@ -143,7 +143,7 @@ class MemTableRep {
143143
// new MemTableRep objects
144144
class MemTableRepFactory {
145145
public:
146-
virtual ~MemTableRepFactory() { };
146+
virtual ~MemTableRepFactory() {}
147147
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
148148
Arena*) = 0;
149149
virtual const char* Name() const = 0;
@@ -159,7 +159,8 @@ class MemTableRepFactory {
159159
// bytes reserved for usage.
160160
class VectorRepFactory : public MemTableRepFactory {
161161
const size_t count_;
162-
public:
162+
163+
public:
163164
explicit VectorRepFactory(size_t count = 0) : count_(count) { }
164165
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
165166
Arena*) override;
@@ -170,9 +171,9 @@ class VectorRepFactory : public MemTableRepFactory {
170171

171172
// This uses a skip list to store keys. It is the default.
172173
class SkipListFactory : public MemTableRepFactory {
173-
public:
174-
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
175-
Arena*) override;
174+
public:
175+
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
176+
Arena*) override;
176177
virtual const char* Name() const override {
177178
return "SkipListFactory";
178179
}
@@ -196,4 +197,4 @@ extern MemTableRepFactory* NewHashSkipListRepFactory(
196197
extern MemTableRepFactory* NewHashLinkListRepFactory(
197198
const SliceTransform* transform, size_t bucket_count = 50000);
198199

199-
}
200+
} // namespace rocksdb

table/format.cc

+16-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
#include "table/format.h"
1111

12+
#include <string>
13+
1214
#include "port/port.h"
1315
#include "rocksdb/env.h"
1416
#include "table/block.h"
@@ -43,22 +45,30 @@ void Footer::EncodeTo(std::string* dst) const {
4345
metaindex_handle_.EncodeTo(dst);
4446
index_handle_.EncodeTo(dst);
4547
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
46-
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
47-
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
48+
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu));
49+
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32));
4850
assert(dst->size() == original_size + kEncodedLength);
4951
}
5052

5153
Status Footer::DecodeFrom(Slice* input) {
5254
assert(input != nullptr);
5355
assert(input->size() >= kEncodedLength);
5456

55-
const char* magic_ptr = input->data() + kEncodedLength - 8;
57+
const char* magic_ptr =
58+
input->data() + kEncodedLength - kMagicNumberLengthByte;
5659
const uint32_t magic_lo = DecodeFixed32(magic_ptr);
5760
const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4);
5861
const uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) |
5962
(static_cast<uint64_t>(magic_lo)));
60-
if (magic != kTableMagicNumber) {
61-
return Status::InvalidArgument("not an sstable (bad magic number)");
63+
if (HasInitializedTableMagicNumber()) {
64+
if (magic != table_magic_number()) {
65+
char buffer[80];
66+
snprintf(buffer, sizeof(buffer) - 1,
67+
"not an sstable (bad magic number --- %lx)", magic);
68+
return Status::InvalidArgument(buffer);
69+
}
70+
} else {
71+
set_table_magic_number(magic);
6272
}
6373

6474
Status result = metaindex_handle_.DecodeFrom(input);
@@ -221,7 +231,7 @@ Status UncompressBlockContents(const char* data, size_t n,
221231
default:
222232
return Status::Corruption("bad block type");
223233
}
224-
result->compression_type = kNoCompression; // not compressed any more
234+
result->compression_type = kNoCompression; // not compressed any more
225235
return Status::OK();
226236
}
227237

table/format.h

+40-5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ class Block;
2121
class RandomAccessFile;
2222
struct ReadOptions;
2323

24+
// the length of the magic number in bytes.
25+
const int kMagicNumberLengthByte = 8;
26+
2427
// BlockHandle is a pointer to the extent of a file that stores a data
2528
// block or a meta block.
2629
class BlockHandle {
@@ -63,12 +66,16 @@ class BlockHandle {
6366
// end of every table file.
6467
class Footer {
6568
public:
69+
// Constructs a footer without specifying its table magic number.
70+
// In such case, the table magic number of such footer should be
71+
// initialized via @ReadFooterFromFile().
72+
Footer() : Footer(kInvalidTableMagicNumber) {}
73+
6674
// @table_magic_number serves two purposes:
6775
// 1. Identify different types of the tables.
6876
// 2. Help us to identify if a given file is a valid sst.
69-
Footer(uint64_t table_magic_number) :
70-
kTableMagicNumber(table_magic_number) {
71-
}
77+
explicit Footer(uint64_t table_magic_number)
78+
: table_magic_number_(table_magic_number) {}
7279

7380
// The block handle for the metaindex block of the table
7481
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
@@ -78,24 +85,52 @@ class Footer {
7885
const BlockHandle& index_handle() const {
7986
return index_handle_;
8087
}
88+
8189
void set_index_handle(const BlockHandle& h) {
8290
index_handle_ = h;
8391
}
8492

93+
uint64_t table_magic_number() const { return table_magic_number_; }
94+
8595
void EncodeTo(std::string* dst) const;
96+
97+
// Set the current footer based on the input slice. If table_magic_number_
98+
// is not set (i.e., HasInitializedTableMagicNumber() is true), then this
99+
// function will also initialize table_magic_number_. Otherwise, this
100+
// function will verify whether the magic number specified in the input
101+
// slice matches table_magic_number_ and update the current footer only
102+
// when the test passes.
86103
Status DecodeFrom(Slice* input);
87104

88105
// Encoded length of a Footer. Note that the serialization of a
89106
// Footer will always occupy exactly this many bytes. It consists
90107
// of two block handles and a magic number.
91108
enum {
92-
kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8
109+
kEncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8
93110
};
94111

112+
const uint64_t kInvalidTableMagicNumber = 0;
113+
95114
private:
115+
// Set the table_magic_number only when it was not previously
116+
// initialized. Return true on success.
117+
bool set_table_magic_number(uint64_t magic_number) {
118+
if (HasInitializedTableMagicNumber()) {
119+
table_magic_number_ = magic_number;
120+
return true;
121+
}
122+
return false;
123+
}
124+
125+
// return true if @table_magic_number_ is set to a value different
126+
// from @kInvalidTableMagicNumber.
127+
bool HasInitializedTableMagicNumber() const {
128+
return (table_magic_number_ != kInvalidTableMagicNumber);
129+
}
130+
96131
BlockHandle metaindex_handle_;
97132
BlockHandle index_handle_;
98-
const uint64_t kTableMagicNumber;
133+
uint64_t table_magic_number_;
99134
};
100135

101136
// Read the footer from file

table/meta_blocks.cc

+52-53
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
// This source code is licensed under the BSD-style license found in the
33
// LICENSE file in the root directory of this source tree. An additional grant
44
// of patent rights can be found in the PATENTS file in the same directory.
5-
65
#include "table/meta_blocks.h"
76

87
#include <map>
8+
#include <string>
99

1010
#include "rocksdb/table.h"
11+
#include "rocksdb/table_properties.h"
1112
#include "table/block.h"
1213
#include "table/format.h"
1314
#include "util/coding.h"
@@ -104,9 +105,8 @@ bool NotifyCollectTableCollectorsOnAdd(
104105
Status s = collector->Add(key, value);
105106
all_succeeded = all_succeeded && s.ok();
106107
if (!s.ok()) {
107-
LogPropertiesCollectionError(
108-
info_log, "Add", /* method */ collector->Name()
109-
);
108+
LogPropertiesCollectionError(info_log, "Add" /* method */,
109+
collector->Name());
110110
}
111111
}
112112
return all_succeeded;
@@ -123,9 +123,8 @@ bool NotifyCollectTableCollectorsOnFinish(
123123

124124
all_succeeded = all_succeeded && s.ok();
125125
if (!s.ok()) {
126-
LogPropertiesCollectionError(
127-
info_log, "Finish", /* method */ collector->Name()
128-
);
126+
LogPropertiesCollectionError(info_log, "Finish" /* method */,
127+
collector->Name());
129128
} else {
130129
builder->Add(user_collected_properties);
131130
}
@@ -151,37 +150,29 @@ Status ReadProperties(
151150
BlockContents block_contents;
152151
ReadOptions read_options;
153152
read_options.verify_checksums = false;
154-
Status s = ReadBlockContents(
155-
file,
156-
read_options,
157-
handle,
158-
&block_contents,
159-
env,
160-
false
161-
);
153+
Status s = ReadBlockContents(file, read_options, handle, &block_contents, env,
154+
false);
162155

163156
if (!s.ok()) {
164157
return s;
165158
}
166159

167160
Block properties_block(block_contents);
168161
std::unique_ptr<Iterator> iter(
169-
properties_block.NewIterator(BytewiseComparator())
170-
);
162+
properties_block.NewIterator(BytewiseComparator()));
171163

172164
// All pre-defined properties of type uint64_t
173165
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
174-
{ TablePropertiesNames::kDataSize, &table_properties->data_size },
175-
{ TablePropertiesNames::kIndexSize, &table_properties->index_size },
176-
{ TablePropertiesNames::kFilterSize, &table_properties->filter_size },
177-
{ TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size },
178-
{ TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size },
179-
{ TablePropertiesNames::kNumDataBlocks,
180-
&table_properties->num_data_blocks },
181-
{ TablePropertiesNames::kNumEntries, &table_properties->num_entries },
182-
{ TablePropertiesNames::kFormatVersion, &table_properties->format_version },
183-
{ TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len },
184-
};
166+
{TablePropertiesNames::kDataSize, &table_properties->data_size},
167+
{TablePropertiesNames::kIndexSize, &table_properties->index_size},
168+
{TablePropertiesNames::kFilterSize, &table_properties->filter_size},
169+
{TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size},
170+
{TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size},
171+
{TablePropertiesNames::kNumDataBlocks,
172+
&table_properties->num_data_blocks},
173+
{TablePropertiesNames::kNumEntries, &table_properties->num_entries},
174+
{TablePropertiesNames::kFormatVersion, &table_properties->format_version},
175+
{TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len}};
185176

186177
std::string last_key;
187178
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@@ -192,10 +183,8 @@ Status ReadProperties(
192183

193184
auto key = iter->key().ToString();
194185
// properties block is strictly sorted with no duplicate key.
195-
assert(
196-
last_key.empty() ||
197-
BytewiseComparator()->Compare(key, last_key) > 0
198-
);
186+
assert(last_key.empty() ||
187+
BytewiseComparator()->Compare(key, last_key) > 0);
199188
last_key = key;
200189

201190
auto raw_val = iter->value();
@@ -218,8 +207,7 @@ Status ReadProperties(
218207
} else {
219208
// handle user-collected properties
220209
table_properties->user_collected_properties.insert(
221-
std::make_pair(key, raw_val.ToString())
222-
);
210+
{key, raw_val.ToString()});
223211
}
224212
}
225213

@@ -244,43 +232,54 @@ Status ReadTableProperties(
244232
BlockContents metaindex_contents;
245233
ReadOptions read_options;
246234
read_options.verify_checksums = false;
247-
s = ReadBlockContents(
248-
file,
249-
read_options,
250-
metaindex_handle,
251-
&metaindex_contents,
252-
env,
253-
false
254-
);
235+
s = ReadBlockContents(file, read_options, metaindex_handle,
236+
&metaindex_contents, env, false);
255237
if (!s.ok()) {
256238
return s;
257239
}
258240
Block metaindex_block(metaindex_contents);
259241
std::unique_ptr<Iterator> meta_iter(
260-
metaindex_block.NewIterator(BytewiseComparator())
261-
);
242+
metaindex_block.NewIterator(BytewiseComparator()));
262243

263244
// -- Read property block
264245
meta_iter->Seek(kPropertiesBlock);
265246
TableProperties table_properties;
266247
if (meta_iter->Valid() &&
267248
meta_iter->key() == kPropertiesBlock &&
268249
meta_iter->status().ok()) {
269-
s = ReadProperties(
270-
meta_iter->value(),
271-
file,
272-
env,
273-
info_log,
274-
properties
275-
);
250+
s = ReadProperties(meta_iter->value(), file, env, info_log, properties);
276251
} else {
277252
s = Status::Corruption(
278-
"Unable to read the property block from the plain table"
279-
);
253+
"Unable to read the property block from the plain table");
280254
}
281255

282256
return s;
283257
}
284258

259+
Status ReadTableMagicNumber(const std::string& file_path,
260+
const Options& options,
261+
const EnvOptions& env_options,
262+
uint64_t* table_magic_number) {
263+
unique_ptr<RandomAccessFile> file;
264+
Status s = options.env->NewRandomAccessFile(file_path, &file, env_options);
265+
if (!s.ok()) {
266+
return s;
267+
}
268+
269+
uint64_t file_size;
270+
options.env->GetFileSize(file_path, &file_size);
271+
if (file_size < Footer::kEncodedLength) {
272+
return Status::InvalidArgument("file is too short to be an sstable");
273+
}
274+
275+
Footer footer;
276+
s = ReadFooterFromFile(file.get(), file_size, &footer);
277+
if (!s.ok()) {
278+
return s;
279+
}
280+
281+
*table_magic_number = footer.table_magic_number();
282+
return Status::OK();
283+
}
285284

286285
} // namespace rocksdb

0 commit comments

Comments
 (0)