From acfcbb394d2b7af65f6416bd510fc62a2ca9978f Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 6 Nov 2024 10:01:24 +0100 Subject: [PATCH] SnapshotBundle uses schema --- cmd/capi/execute.cpp | 44 ++++---- cmd/dev/snapshots.cpp | 8 +- silkworm/capi/silkworm.cpp | 28 +++-- silkworm/db/blocks/schema_config.hpp | 14 +++ .../db/blocks/transactions/txn_queries.hpp | 9 +- .../snapshots/common/snapshot_path.cpp | 45 +++++--- .../snapshots/common/snapshot_path.hpp | 1 + .../datastore/snapshots/snapshot_bundle.cpp | 62 +++++++---- .../datastore/snapshots/snapshot_bundle.hpp | 102 ++++-------------- .../snapshots/snapshot_bundle_factory.hpp | 2 - .../snapshots/snapshot_repository.cpp | 80 +++++++------- .../snapshots/snapshot_repository.hpp | 3 +- silkworm/db/snapshot_bundle_factory_impl.cpp | 26 +---- silkworm/db/snapshot_bundle_factory_impl.hpp | 1 - silkworm/db/snapshot_sync.cpp | 8 +- silkworm/db/snapshot_sync_test.cpp | 22 +--- 16 files changed, 209 insertions(+), 246 deletions(-) diff --git a/cmd/capi/execute.cpp b/cmd/capi/execute.cpp index 888ca8e56d..5bd8f5d1d6 100644 --- a/cmd/capi/execute.cpp +++ b/cmd/capi/execute.cpp @@ -151,19 +151,19 @@ std::vector collect_all_snapshots(const SnapshotRepositor std::vector transactions_snapshot_sequence; for (const auto& bundle_ptr : repository.view_bundles()) { - const auto& bundle = *bundle_ptr; + db::blocks::BundleDataRef bundle{**bundle_ptr}; { { SilkwormHeadersSnapshot raw_headers_snapshot{ .segment{ - .file_path = make_path(bundle.header_segment.path()), - .memory_address = bundle.header_segment.memory_file_region().data(), - .memory_length = bundle.header_segment.memory_file_region().size(), + .file_path = make_path(bundle.header_segment().path()), + .memory_address = bundle.header_segment().memory_file_region().data(), + .memory_length = bundle.header_segment().memory_file_region().size(), }, .header_hash_index{ - .file_path = make_path(bundle.idx_header_hash.path()), - .memory_address = bundle.idx_header_hash.memory_file_region().data(), - .memory_length = bundle.idx_header_hash.memory_file_region().size(), + .file_path = make_path(bundle.idx_header_hash().path()), + .memory_address = bundle.idx_header_hash().memory_file_region().data(), + .memory_length = bundle.idx_header_hash().memory_file_region().size(), }, }; headers_snapshot_sequence.push_back(raw_headers_snapshot); @@ -171,14 +171,14 @@ std::vector collect_all_snapshots(const SnapshotRepositor { SilkwormBodiesSnapshot raw_bodies_snapshot{ .segment{ - .file_path = make_path(bundle.body_segment.path()), - .memory_address = bundle.body_segment.memory_file_region().data(), - .memory_length = bundle.body_segment.memory_file_region().size(), + .file_path = make_path(bundle.body_segment().path()), + .memory_address = bundle.body_segment().memory_file_region().data(), + .memory_length = bundle.body_segment().memory_file_region().size(), }, .block_num_index{ - .file_path = make_path(bundle.idx_body_number.path()), - .memory_address = bundle.idx_body_number.memory_file_region().data(), - .memory_length = bundle.idx_body_number.memory_file_region().size(), + .file_path = make_path(bundle.idx_body_number().path()), + .memory_address = bundle.idx_body_number().memory_file_region().data(), + .memory_length = bundle.idx_body_number().memory_file_region().size(), }, }; bodies_snapshot_sequence.push_back(raw_bodies_snapshot); @@ -186,19 +186,19 @@ std::vector collect_all_snapshots(const SnapshotRepositor { SilkwormTransactionsSnapshot raw_transactions_snapshot{ .segment{ - .file_path = make_path(bundle.txn_segment.path()), - .memory_address = bundle.txn_segment.memory_file_region().data(), - .memory_length = bundle.txn_segment.memory_file_region().size(), + .file_path = make_path(bundle.txn_segment().path()), + .memory_address = bundle.txn_segment().memory_file_region().data(), + .memory_length = bundle.txn_segment().memory_file_region().size(), }, .tx_hash_index{ - .file_path = make_path(bundle.idx_txn_hash.path()), - .memory_address = bundle.idx_txn_hash.memory_file_region().data(), - .memory_length = bundle.idx_txn_hash.memory_file_region().size(), + .file_path = make_path(bundle.idx_txn_hash().path()), + .memory_address = bundle.idx_txn_hash().memory_file_region().data(), + .memory_length = bundle.idx_txn_hash().memory_file_region().size(), }, .tx_hash_2_block_index{ - .file_path = make_path(bundle.idx_txn_hash_2_block.path()), - .memory_address = bundle.idx_txn_hash_2_block.memory_file_region().data(), - .memory_length = bundle.idx_txn_hash_2_block.memory_file_region().size(), + .file_path = make_path(bundle.idx_txn_hash_2_block().path()), + .memory_address = bundle.idx_txn_hash_2_block().memory_file_region().data(), + .memory_length = bundle.idx_txn_hash_2_block().memory_file_region().size(), }, }; transactions_snapshot_sequence.push_back(raw_transactions_snapshot); diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index 678ba11873..f0cfdb064e 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -328,8 +328,8 @@ BodyCounters count_bodies_in_all(const SnapshotSubcommandSettings& settings) { int num_bodies = 0; uint64_t num_txns = 0; for (const auto& bundle_ptr : repository.view_bundles()) { - const auto& bundle = *bundle_ptr; - const auto [body_count, txn_count] = count_bodies_in_one(settings, bundle.body_segment); + db::blocks::BundleDataRef bundle{**bundle_ptr}; + const auto [body_count, txn_count] = count_bodies_in_one(settings, bundle.body_segment()); num_bodies += body_count; num_txns += txn_count; } @@ -376,8 +376,8 @@ int count_headers_in_all(const SnapshotSubcommandSettings& settings) { auto repository = make_repository(settings.settings); int num_headers{0}; for (const auto& bundle_ptr : repository.view_bundles()) { - const auto& bundle = *bundle_ptr; - const auto header_count = count_headers_in_one(settings, bundle.header_segment); + db::blocks::BundleDataRef bundle{**bundle_ptr}; + const auto header_count = count_headers_in_one(settings, bundle.header_segment()); num_headers += header_count; } return num_headers; diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index 7d1372e25e..641dd18f80 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -378,19 +378,25 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn snapshots::Index idx_txn_hash{transactions_segment_path->related_path(snapshots::SnapshotType::transactions, snapshots::kIdxExtension), make_region(ts.tx_hash_index)}; snapshots::Index idx_txn_hash_2_block{transactions_segment_path->related_path(snapshots::SnapshotType::transactions_to_block, snapshots::kIdxExtension), make_region(ts.tx_hash_2_block_index)}; - snapshots::SnapshotBundle bundle{ - headers_segment_path->step_range(), - { - .header_segment = std::move(header_segment), - .idx_header_hash = std::move(idx_header_hash), + snapshots::SnapshotBundleData bundle_data = [&]() { + snapshots::SnapshotBundleData data; - .body_segment = std::move(body_segment), - .idx_body_number = std::move(idx_body_number), + data.segments.emplace(db::blocks::kHeaderSegmentName, std::move(header_segment)); + data.rec_split_indexes.emplace(db::blocks::kIdxHeaderHashName, std::move(idx_header_hash)); - .txn_segment = std::move(txn_segment), - .idx_txn_hash = std::move(idx_txn_hash), - .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), - }, + data.segments.emplace(db::blocks::kBodySegmentName, std::move(body_segment)); + data.rec_split_indexes.emplace(db::blocks::kIdxBodyNumberName, std::move(idx_body_number)); + + data.segments.emplace(db::blocks::kTxnSegmentName, std::move(txn_segment)); + data.rec_split_indexes.emplace(db::blocks::kIdxTxnHashName, std::move(idx_txn_hash)); + data.rec_split_indexes.emplace(db::blocks::kIdxTxnHash2BlockName, std::move(idx_txn_hash_2_block)); + + return data; + }(); + + snapshots::SnapshotBundle bundle{ + headers_segment_path->step_range(), + std::move(bundle_data), }; handle->repository->add_snapshot_bundle(std::move(bundle)); return SILKWORM_OK; diff --git a/silkworm/db/blocks/schema_config.hpp b/silkworm/db/blocks/schema_config.hpp index bcd485f9d7..2dd909b396 100644 --- a/silkworm/db/blocks/schema_config.hpp +++ b/silkworm/db/blocks/schema_config.hpp @@ -44,4 +44,18 @@ inline constexpr datastore::EntityName kIdxTxnHashName{"transactions"}; //! Index transaction_hash -> block_num inline constexpr datastore::EntityName kIdxTxnHash2BlockName{"transactions_to_block"}; +struct BundleDataRef { + const snapshots::SnapshotBundleData& data; + + const snapshots::SegmentFileReader& header_segment() const { return data.segments.at(kHeaderSegmentName); } + const snapshots::Index& idx_header_hash() const { return data.rec_split_indexes.at(kIdxHeaderHashName); } + + const snapshots::SegmentFileReader& body_segment() const { return data.segments.at(kBodySegmentName); } + const snapshots::Index& idx_body_number() const { return data.rec_split_indexes.at(kIdxBodyNumberName); } + + const snapshots::SegmentFileReader& txn_segment() const { return data.segments.at(kTxnSegmentName); } + const snapshots::Index& idx_txn_hash() const { return data.rec_split_indexes.at(kIdxTxnHashName); } + const snapshots::Index& idx_txn_hash_2_block() const { return data.rec_split_indexes.at(kIdxTxnHash2BlockName); } +}; + } // namespace silkworm::db::blocks diff --git a/silkworm/db/blocks/transactions/txn_queries.hpp b/silkworm/db/blocks/transactions/txn_queries.hpp index 21cb5e8352..428a6c21d9 100644 --- a/silkworm/db/blocks/transactions/txn_queries.hpp +++ b/silkworm/db/blocks/transactions/txn_queries.hpp @@ -22,6 +22,7 @@ #include #include +#include "../schema_config.hpp" #include "txn_segment.hpp" namespace silkworm::snapshots { @@ -59,10 +60,10 @@ class TransactionBlockNumByTxnHashRepoQuery { std::optional exec(const Hash& hash) { for (const TBundle& bundle_ptr : bundles_) { - const auto& bundle = *bundle_ptr; - const SegmentFileReader& segment = bundle.txn_segment; - const Index& idx_txn_hash = bundle.idx_txn_hash; - const Index& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block; + db::blocks::BundleDataRef bundle{**bundle_ptr}; + const SegmentFileReader& segment = bundle.txn_segment(); + const Index& idx_txn_hash = bundle.idx_txn_hash(); + const Index& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block(); TransactionFindByHashQuery cross_check_query{{segment, idx_txn_hash}}; TransactionBlockNumByTxnHashQuery query{idx_txn_hash_2_block, cross_check_query}; diff --git a/silkworm/db/datastore/snapshots/common/snapshot_path.cpp b/silkworm/db/datastore/snapshots/common/snapshot_path.cpp index 3e9f4e6c54..48199d3422 100644 --- a/silkworm/db/datastore/snapshots/common/snapshot_path.cpp +++ b/silkworm/db/datastore/snapshots/common/snapshot_path.cpp @@ -32,7 +32,7 @@ namespace silkworm::snapshots { namespace fs = std::filesystem; -std::optional SnapshotPath::parse(fs::path path) { +std::optional SnapshotPath::parse_step_range(fs::path path) { const std::string filename_no_ext = path.stem().string(); // Expected stem format: -<6_digit_block_from>-<6_digit_block_to>- @@ -43,17 +43,6 @@ std::optional SnapshotPath::parse(fs::path path) { const auto [ver, from, to, tag] = std::tie(tokens[0], tokens[1], tokens[2], tokens[3]); - // Expected version format: v (hence check length, check first char and parse w/ offset by one) - if (ver.empty() || ver[0] != 'v') { - return std::nullopt; - } - - uint8_t ver_num = 0; - const auto ver_result = std::from_chars(ver.data() + 1, ver.data() + ver.size(), ver_num); - if (ver_result.ec == std::errc::invalid_argument) { - return std::nullopt; - } - // Expected scaled block format: if (from.size() != 6 || to.size() != 6) { return std::nullopt; @@ -76,6 +65,36 @@ std::optional SnapshotPath::parse(fs::path path) { return std::nullopt; } + return StepRange{step_from, step_to}; +} + +std::optional SnapshotPath::parse(fs::path path) { + const std::string filename_no_ext = path.stem().string(); + + // Expected stem format: -<6_digit_block_from>-<6_digit_block_to>- + const std::vector tokens = absl::StrSplit(filename_no_ext, absl::MaxSplits('-', 3)); + if (tokens.size() != 4) { + return std::nullopt; + } + + const auto [ver, from, to, tag] = std::tie(tokens[0], tokens[1], tokens[2], tokens[3]); + + // Expected version format: v (hence check length, check first char and parse w/ offset by one) + if (ver.empty() || ver[0] != 'v') { + return std::nullopt; + } + + uint8_t ver_num = 0; + const auto ver_result = std::from_chars(ver.data() + 1, ver.data() + ver.size(), ver_num); + if (ver_result.ec == std::errc::invalid_argument) { + return std::nullopt; + } + + auto step_range = parse_step_range(path); + if (!step_range) { + return std::nullopt; + } + // Expected tag format: headers|bodies|transactions|transactions-to-block // parsing relies on magic_enum, so SnapshotType items must match exactly std::string tag_str{tag.data(), tag.size()}; @@ -85,7 +104,7 @@ std::optional SnapshotPath::parse(fs::path path) { return std::nullopt; } - return SnapshotPath{std::move(path), ver_num, {step_from, step_to}, *type}; + return SnapshotPath{std::move(path), ver_num, *step_range, *type}; } SnapshotPath SnapshotPath::make( diff --git a/silkworm/db/datastore/snapshots/common/snapshot_path.hpp b/silkworm/db/datastore/snapshots/common/snapshot_path.hpp index bf10ce7053..c0b8948786 100644 --- a/silkworm/db/datastore/snapshots/common/snapshot_path.hpp +++ b/silkworm/db/datastore/snapshots/common/snapshot_path.hpp @@ -36,6 +36,7 @@ inline constexpr uint8_t kSnapshotV1{1}; class SnapshotPath { public: static std::optional parse(std::filesystem::path path); + static std::optional parse_step_range(std::filesystem::path path); static SnapshotPath make( const std::filesystem::path& dir, diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle.cpp b/silkworm/db/datastore/snapshots/snapshot_bundle.cpp index 2347a53910..e50c001ef9 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle.cpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle.cpp @@ -16,57 +16,79 @@ #include "snapshot_bundle.hpp" +#include + #include namespace silkworm::snapshots { +SnapshotBundleData make_bundle_data( + const Schema::RepositoryDef& schema, + const std::filesystem::path& dir_path, + StepRange step_range) { + return { + schema.make_segments(dir_path, step_range), + schema.make_rec_split_indexes(dir_path, step_range), + }; +} + SnapshotBundle::~SnapshotBundle() { close(); } void SnapshotBundle::reopen() { - for (auto& segment_ref : segments()) { - segment_ref.get().reopen_segment(); - ensure(!segment_ref.get().empty(), [&]() { - return "invalid empty snapshot " + segment_ref.get().fs_path().string(); + for (auto& entry : data_.segments) { + SegmentFileReader& segment = entry.second; + segment.reopen_segment(); + ensure(!segment.empty(), [&]() { + return "invalid empty snapshot " + segment.fs_path().string(); }); } - for (auto& index_ref : indexes()) { - index_ref.get().reopen_index(); + for (auto& entry : data_.rec_split_indexes) { + Index& index = entry.second; + index.reopen_index(); } } void SnapshotBundle::close() { - for (auto& index_ref : indexes()) { - index_ref.get().close_index(); + for (auto& entry : data_.rec_split_indexes) { + Index& index = entry.second; + index.close_index(); } - for (auto& segment_ref : segments()) { - segment_ref.get().close(); + for (auto& entry : data_.segments) { + SegmentFileReader& segment = entry.second; + segment.close(); } if (on_close_callback_) { on_close_callback_(*this); } } +const SegmentFileReader& SnapshotBundle::segment(SnapshotType type) const { + datastore::EntityName name{magic_enum::enum_name(type)}; + return data_.segments.at(name); +} + +const Index& SnapshotBundle::index(SnapshotType type) const { + datastore::EntityName name{magic_enum::enum_name(type)}; + return data_.rec_split_indexes.at(name); +} + std::vector SnapshotBundle::files() { std::vector files; - files.reserve(kSnapshotsCount + kIndexesCount); - - for (auto& segment_ref : segments()) { - files.push_back(segment_ref.get().path().path()); + for (const SegmentFileReader& segment : segments()) { + files.push_back(segment.path().path()); } - for (auto& index_ref : indexes()) { - files.push_back(index_ref.get().path().path()); + for (const Index& index : rec_split_indexes()) { + files.push_back(index.path().path()); } return files; } std::vector SnapshotBundle::segment_paths() { std::vector paths; - paths.reserve(kSnapshotsCount); - - for (auto& segment_ref : segments()) { - paths.push_back(segment_ref.get().path()); + for (const SegmentFileReader& segment : segments()) { + paths.push_back(segment.path()); } return paths; } diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp index 7d60c1bc19..702ed81a17 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp @@ -33,24 +33,15 @@ namespace silkworm::snapshots { struct SnapshotBundleData { - SegmentFileReader header_segment; - //! Index header_hash -> block_num -> headers_segment_offset - Index idx_header_hash; - - SegmentFileReader body_segment; - //! Index block_num -> bodies_segment_offset - Index idx_body_number; - - SegmentFileReader txn_segment; - //! Index transaction_hash -> txn_id -> transactions_segment_offset - Index idx_txn_hash; - //! Index transaction_hash -> block_num - Index idx_txn_hash_2_block; - - static constexpr size_t kSnapshotsCount = 3; - static constexpr size_t kIndexesCount = 4; + std::map segments; + std::map rec_split_indexes; }; +SnapshotBundleData make_bundle_data( + const Schema::RepositoryDef& schema, + const std::filesystem::path& dir_path, + StepRange step_range); + struct SnapshotBundlePaths { SnapshotBundlePaths(Schema::RepositoryDef schema, std::filesystem::path dir_path, StepRange step_range) : schema_{std::move(schema)}, @@ -68,10 +59,10 @@ struct SnapshotBundlePaths { StepRange step_range_; }; -struct SnapshotBundle : public SnapshotBundleData { - explicit SnapshotBundle(StepRange step_range, SnapshotBundleData bundle) - : SnapshotBundleData{std::move(bundle)}, - step_range_{step_range} { +struct SnapshotBundle { + SnapshotBundle(StepRange step_range, SnapshotBundleData data) + : step_range_{step_range}, + data_{std::move(data)} { reopen(); } virtual ~SnapshotBundle(); @@ -79,69 +70,14 @@ struct SnapshotBundle : public SnapshotBundleData { SnapshotBundle(SnapshotBundle&&) = default; SnapshotBundle& operator=(SnapshotBundle&&) noexcept = default; - std::array, kSnapshotsCount> segments() { - return { - header_segment, - body_segment, - txn_segment, - }; - } - - std::array, kIndexesCount> indexes() { - return { - idx_header_hash, - idx_body_number, - idx_txn_hash, - idx_txn_hash_2_block, - }; - } - - std::array snapshot_types() { - return { - SnapshotType::headers, - SnapshotType::bodies, - SnapshotType::transactions, - }; - } - - std::array index_types() { - return { - SnapshotType::headers, - SnapshotType::bodies, - SnapshotType::transactions, - SnapshotType::transactions_to_block, - }; - } - - const SegmentFileReader& segment(SnapshotType type) const { - switch (type) { - case headers: - return header_segment; - case bodies: - return body_segment; - case transactions: - case transactions_to_block: - return txn_segment; - } - SILKWORM_ASSERT(false); - return header_segment; + auto segments() { + return make_map_values_view(data_.segments); } - - const Index& index(SnapshotType type) const { - switch (type) { - case headers: - return idx_header_hash; - case bodies: - return idx_body_number; - case transactions: - return idx_txn_hash; - case transactions_to_block: - return idx_txn_hash_2_block; - } - SILKWORM_ASSERT(false); - return idx_header_hash; + auto rec_split_indexes() { + return make_map_values_view(data_.rec_split_indexes); } - + const SegmentFileReader& segment(SnapshotType type) const; + const Index& index(SnapshotType type) const; SegmentAndIndex segment_and_index(SnapshotType type) const { return {segment(type), index(type)}; } @@ -158,8 +94,12 @@ struct SnapshotBundle : public SnapshotBundleData { on_close_callback_ = std::move(callback); } + const SnapshotBundleData& operator*() const { return data_; } + const SnapshotBundleData* operator->() const { return &data_; } + private: StepRange step_range_; + SnapshotBundleData data_; std::function on_close_callback_; }; diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp b/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp index b888b4984e..0195de4b58 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp @@ -30,8 +30,6 @@ namespace silkworm::snapshots { struct SnapshotBundleFactory { virtual ~SnapshotBundleFactory() = default; - using PathByTypeProvider = std::function; - virtual SnapshotBundle make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const = 0; virtual SnapshotBundle make(const std::filesystem::path& dir_path, StepRange range) const = 0; virtual SnapshotBundlePaths make_paths(const std::filesystem::path& dir_path, StepRange range) const = 0; diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.cpp b/silkworm/db/datastore/snapshots/snapshot_repository.cpp index 3f73dd7ed8..8c0419d848 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.cpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.cpp @@ -113,62 +113,42 @@ std::vector> SnapshotRepository::missing_indexes() void SnapshotRepository::reopen_folder() { SILK_INFO << "Reopen snapshot repository folder: " << dir_path_.string(); - SnapshotPathList all_snapshot_paths = get_segment_files(); - SnapshotPathList all_index_paths = get_idx_files(); - std::map>> groups; + auto file_ranges = list_dir_file_ranges(); + if (file_ranges.empty()) return; - for (size_t i = 0; i < all_snapshot_paths.size(); ++i) { - auto& path = all_snapshot_paths[i]; - auto& group = groups[path.step_range().start][false]; - group[path.type()] = i; - } - - for (size_t i = 0; i < all_index_paths.size(); ++i) { - auto& path = all_index_paths[i]; - auto& group = groups[path.step_range().start][true]; - group[path.type()] = i; - } - - Step num{0}; - if (!groups.empty()) { - num = groups.begin()->first; - } + // sort file_ranges by range.start + std::sort(file_ranges.begin(), file_ranges.end(), [](const StepRange& r1, const StepRange& r2) -> bool { + return r1.start < r2.start; + }); std::unique_lock lock(*bundles_mutex_); // copy bundles prior to modification auto bundles = std::make_shared(*bundles_); - while (groups.contains(num) && - (groups[num][false].size() == SnapshotBundle::kSnapshotsCount) && - (groups[num][true].size() == SnapshotBundle::kIndexesCount)) { + Step num = file_ranges[0].start; + for (const auto& range : file_ranges) { + // avoid gaps/overlaps + if (range.start != num) continue; + if (range.size() == 0) continue; + if (!bundles->contains(num)) { - auto snapshot_path = [&](SnapshotType type) { - return all_snapshot_paths[groups[num][false][type]]; - }; - auto index_path = [&](SnapshotType type) { - return all_index_paths[groups[num][true][type]]; - }; - SnapshotBundle bundle = bundle_factory_->make(snapshot_path, index_path); - - bundles->insert_or_assign(num, std::make_shared(std::move(bundle))); + SnapshotBundlePaths bundle_paths = bundle_factory_->make_paths(dir_path_, range); + // if all bundle paths exist + if (std::ranges::all_of(bundle_paths.files(), [](const fs::path& p) { return fs::exists(p); })) { + SnapshotBundle bundle = bundle_factory_->make(dir_path_, range); + bundles->insert_or_assign(num, std::make_shared(std::move(bundle))); + } } - auto& bundle = *bundles->at(num); - - if (num < bundle.step_range().end) { - num = bundle.step_range().end; - } else { - break; - } + // avoid gaps/overlaps + num = range.end; } bundles_ = bundles; lock.unlock(); SILK_INFO << "Total reopened bundles: " << bundles_count() - << " segments: " << total_segments_count() - << " indexes: " << total_indexes_count() << " max block available: " << max_block_available(); } @@ -221,6 +201,26 @@ SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { return snapshot_files; } +std::vector SnapshotRepository::list_dir_file_ranges() const { + ensure(fs::exists(dir_path_), + [&]() { return "SnapshotRepository: " + dir_path_.string() + " does not exist"; }); + ensure(fs::is_directory(dir_path_), + [&]() { return "SnapshotRepository: " + dir_path_.string() + " is a not folder"; }); + + std::vector results; + for (const auto& file : fs::directory_iterator{dir_path_}) { + if (!fs::is_regular_file(file.path())) { + continue; + } + const auto range = SnapshotPath::parse_step_range(file.path()); + if (range) { + results.push_back(*range); + } + } + + return results; +} + bool is_stale_index_path(const SnapshotPath& index_path) { SnapshotType snapshot_type = (index_path.type() == SnapshotType::transactions_to_block) ? SnapshotType::transactions diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.hpp b/silkworm/db/datastore/snapshots/snapshot_repository.hpp index 8e3c12fd2e..ede6698f13 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.hpp @@ -66,8 +66,6 @@ class SnapshotRepository { void replace_snapshot_bundles(SnapshotBundle bundle); size_t bundles_count() const; - size_t total_segments_count() const { return bundles_count() * SnapshotBundle::kSnapshotsCount; } - size_t total_indexes_count() const { return bundles_count() * SnapshotBundle::kIndexesCount; } //! All types of .seg and .idx files are available up to this block number BlockNum max_block_available() const; @@ -123,6 +121,7 @@ class SnapshotRepository { } SnapshotPathList get_files(const std::string& ext) const; + std::vector list_dir_file_ranges() const; SnapshotPathList stale_index_paths() const; diff --git a/silkworm/db/snapshot_bundle_factory_impl.cpp b/silkworm/db/snapshot_bundle_factory_impl.cpp index 64a8d355c0..ca6aa0674c 100644 --- a/silkworm/db/snapshot_bundle_factory_impl.cpp +++ b/silkworm/db/snapshot_bundle_factory_impl.cpp @@ -27,31 +27,11 @@ namespace silkworm::db { using namespace snapshots; -SnapshotBundle SnapshotBundleFactoryImpl::make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const { - return SnapshotBundle{ - snapshot_path(SnapshotType::headers).step_range(), - { - .header_segment = SegmentFileReader(snapshot_path(SnapshotType::headers)), - .idx_header_hash = Index(index_path(SnapshotType::headers)), - - .body_segment = SegmentFileReader(snapshot_path(SnapshotType::bodies)), - .idx_body_number = Index(index_path(SnapshotType::bodies)), - - .txn_segment = SegmentFileReader(snapshot_path(SnapshotType::transactions)), - .idx_txn_hash = Index(index_path(SnapshotType::transactions)), - .idx_txn_hash_2_block = Index(index_path(SnapshotType::transactions_to_block)), - }, - }; -} - SnapshotBundle SnapshotBundleFactoryImpl::make(const std::filesystem::path& dir_path, snapshots::StepRange range) const { - PathByTypeProvider snapshot_path = [&](silkworm::snapshots::SnapshotType type) { - return SnapshotPath::make(dir_path, kSnapshotV1, range, type); - }; - PathByTypeProvider index_path = [&](silkworm::snapshots::SnapshotType type) { - return SnapshotPath::make(dir_path, kSnapshotV1, range, type, kIdxExtension); + return SnapshotBundle{ + range, + make_bundle_data(schema_, dir_path, range), }; - return make(std::move(snapshot_path), std::move(index_path)); } SnapshotBundlePaths SnapshotBundleFactoryImpl::make_paths(const std::filesystem::path& dir_path, snapshots::StepRange range) const { diff --git a/silkworm/db/snapshot_bundle_factory_impl.hpp b/silkworm/db/snapshot_bundle_factory_impl.hpp index fe99a496f8..0af8dc5895 100644 --- a/silkworm/db/snapshot_bundle_factory_impl.hpp +++ b/silkworm/db/snapshot_bundle_factory_impl.hpp @@ -26,7 +26,6 @@ struct SnapshotBundleFactoryImpl : public snapshots::SnapshotBundleFactory { : schema_{std::move(schema)} {} ~SnapshotBundleFactoryImpl() override = default; - snapshots::SnapshotBundle make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const override; snapshots::SnapshotBundle make(const std::filesystem::path& dir_path, snapshots::StepRange range) const override; snapshots::SnapshotBundlePaths make_paths(const std::filesystem::path& dir_path, snapshots::StepRange range) const override; std::vector> index_builders(const snapshots::SnapshotPath& segment_path) const override; diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index c030a58e6d..2159173bee 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -279,9 +279,9 @@ void SnapshotSync::seed_frozen_local_snapshots() { auto& bundle = *bundle_ptr; auto block_range = bundle.step_range().to_block_num_range(); bool is_frozen = block_range.size() >= kMaxMergerSnapshotSize; - const auto first_snapshot = bundle.segments()[0]; + const SegmentFileReader& first_snapshot = *bundle.segments().begin(); // assume that if one snapshot in the bundle is preverified, then all of them are - bool is_preverified = snapshots_config_.contains_file_name(first_snapshot.get().path().filename()); + bool is_preverified = snapshots_config_.contains_file_name(first_snapshot.path().filename()); if (is_frozen && !is_preverified) { seed_bundle(bundle); } @@ -336,8 +336,8 @@ void SnapshotSync::update_block_headers(RWTxn& txn, BlockNum max_block_available uint64_t block_count{0}; for (const auto& bundle_ptr : repository_.view_bundles()) { - const auto& bundle = *bundle_ptr; - for (const BlockHeader& header : HeaderSegmentReader{bundle.header_segment}) { + db::blocks::BundleDataRef bundle{**bundle_ptr}; + for (const BlockHeader& header : HeaderSegmentReader{bundle.header_segment()}) { SILK_TRACE << "SnapshotSync: header number=" << header.number << " hash=" << Hash{header.hash()}.to_hex(); const auto block_number = header.number; if (block_number > max_block_available) continue; diff --git a/silkworm/db/snapshot_sync_test.cpp b/silkworm/db/snapshot_sync_test.cpp index 673948ccd1..a482de524f 100644 --- a/silkworm/db/snapshot_sync_test.cpp +++ b/silkworm/db/snapshot_sync_test.cpp @@ -115,44 +115,28 @@ TEST_CASE("SnapshotSync::update_block_headers", "[db][snapshot][sync]") { // Create a sample Header snapshot+index snapshots::test_util::SampleHeaderSnapshotFile header_segment_file{tmp_dir_path}; auto& header_segment_path = header_segment_file.path(); - SegmentFileReader header_segment{header_segment_path}; auto header_index_builder = HeaderIndex::make(header_segment_path); header_index_builder.set_base_data_id(header_segment_file.block_num_range().start); REQUIRE_NOTHROW(header_index_builder.build()); - Index idx_header_hash{header_segment_path.index_file()}; // Create a sample Body snapshot+index snapshots::test_util::SampleBodySnapshotFile body_segment_file{tmp_dir_path}; auto& body_segment_path = body_segment_file.path(); - SegmentFileReader body_segment{body_segment_path}; auto body_index_builder = BodyIndex::make(body_segment_path); body_index_builder.set_base_data_id(body_segment_file.block_num_range().start); REQUIRE_NOTHROW(body_index_builder.build()); - Index idx_body_number{body_segment_path.index_file()}; // Create a sample Transaction snapshot+indexes snapshots::test_util::SampleTransactionSnapshotFile txn_segment_file{tmp_dir_path}; auto& txn_segment_path = txn_segment_file.path(); - SegmentFileReader txn_segment{txn_segment_path}; REQUIRE_NOTHROW(TransactionIndex::make(body_segment_path, txn_segment_path).build()); REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build()); - Index idx_txn_hash{txn_segment_path.related_path(SnapshotType::transactions, kIdxExtension)}; - Index idx_txn_hash_2_block{txn_segment_path.related_path(SnapshotType::transactions_to_block, kIdxExtension)}; // Add a sample Snapshot bundle to the repository + auto step_range = StepRange::from_block_num_range(snapshots::test_util::kSampleSnapshotBlockRange); SnapshotBundle bundle{ - header_segment_path.step_range(), - { - .header_segment = std::move(header_segment), - .idx_header_hash = std::move(idx_header_hash), - - .body_segment = std::move(body_segment), - .idx_body_number = std::move(idx_body_number), - - .txn_segment = std::move(txn_segment), - .idx_txn_hash = std::move(idx_txn_hash), - .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), - }, + step_range, + make_bundle_data(blocks::make_blocks_repository_schema(), tmp_dir_path, step_range), }; auto& repository = snapshot_sync.repository(); repository.add_snapshot_bundle(std::move(bundle));