Skip to content

Commit

Permalink
v2.1: Marks old storages as dirty and uncleaned in clean_accounts() (…
Browse files Browse the repository at this point in the history
…backport of #3737) (#3748)

* Marks old storages as dirty and uncleaned in clean_accounts() (#3737)

(cherry picked from commit 31742ca)

# Conflicts:
#	accounts-db/src/accounts_db/tests.rs

* fixes merge conflict

---------

Co-authored-by: Brooks <brooks@anza.xyz>
  • Loading branch information
mergify[bot] and brooksprumo authored Nov 25, 2024
1 parent 46a2595 commit 970606e
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 59 deletions.
166 changes: 116 additions & 50 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2457,11 +2457,30 @@ impl AccountsDb {
is_startup: bool,
timings: &mut CleanKeyTimings,
epoch_schedule: &EpochSchedule,
old_storages_policy: OldStoragesPolicy,
) -> CleaningCandidates {
let oldest_non_ancient_slot = self.get_oldest_non_ancient_slot(epoch_schedule);
let mut dirty_store_processing_time = Measure::start("dirty_store_processing");
let max_slot_inclusive =
max_clean_root_inclusive.unwrap_or_else(|| self.accounts_index.max_root_inclusive());
let max_root_inclusive = self.accounts_index.max_root_inclusive();
let max_slot_inclusive = max_clean_root_inclusive.unwrap_or(max_root_inclusive);

if old_storages_policy == OldStoragesPolicy::Clean {
let slot_one_epoch_old =
max_root_inclusive.saturating_sub(epoch_schedule.slots_per_epoch);
// do nothing special for these 100 old storages that will likely get cleaned up shortly
let acceptable_straggler_slot_count = 100;
let old_slot_cutoff =
slot_one_epoch_old.saturating_sub(acceptable_straggler_slot_count);
let (old_storages, old_slots) = self.get_snapshot_storages(..old_slot_cutoff);
let num_old_storages = old_storages.len();
self.accounts_index
.add_uncleaned_roots(old_slots.iter().copied());
for (old_slot, old_storage) in std::iter::zip(old_slots, old_storages) {
self.dirty_stores.entry(old_slot).or_insert(old_storage);
}
info!("Marked {num_old_storages} old storages as dirty");
}

let mut dirty_stores = Vec::with_capacity(self.dirty_stores.len());
// find the oldest dirty slot
// we'll add logging if that append vec cannot be marked dead
Expand Down Expand Up @@ -2573,7 +2592,16 @@ impl AccountsDb {

/// Call clean_accounts() with the common parameters that tests/benches use.
pub fn clean_accounts_for_tests(&self) {
self.clean_accounts(None, false, &EpochSchedule::default())
self.clean_accounts(
None,
false,
&EpochSchedule::default(),
if self.ancient_append_vec_offset.is_some() {
OldStoragesPolicy::Leave
} else {
OldStoragesPolicy::Clean
},
)
}

/// called with cli argument to verify refcounts are correct on all accounts
Expand Down Expand Up @@ -2680,6 +2708,7 @@ impl AccountsDb {
max_clean_root_inclusive: Option<Slot>,
is_startup: bool,
epoch_schedule: &EpochSchedule,
old_storages_policy: OldStoragesPolicy,
) {
if self.exhaustively_verify_refcounts {
self.exhaustively_verify_refcounts(max_clean_root_inclusive);
Expand All @@ -2701,6 +2730,7 @@ impl AccountsDb {
is_startup,
&mut key_timings,
epoch_schedule,
old_storages_policy,
);

let num_candidates = Self::count_pubkeys(&candidates);
Expand Down Expand Up @@ -4561,7 +4591,15 @@ impl AccountsDb {
let maybe_clean = || {
if self.dirty_stores.len() > DIRTY_STORES_CLEANING_THRESHOLD {
let latest_full_snapshot_slot = self.latest_full_snapshot_slot();
self.clean_accounts(latest_full_snapshot_slot, is_startup, epoch_schedule);
self.clean_accounts(
latest_full_snapshot_slot,
is_startup,
epoch_schedule,
// Leave any old storages alone for now. Once the validator is running
// normal, calls to clean_accounts() will have the correct policy based
// on if ancient storages are enabled or not.
OldStoragesPolicy::Leave,
);
}
};

Expand Down Expand Up @@ -6738,40 +6776,6 @@ impl AccountsDb {
true
}

/// storages are sorted by slot and have range info.
/// add all stores older than slots_per_epoch to dirty_stores so clean visits these slots
fn mark_old_slots_as_dirty(
&self,
storages: &SortedStorages,
slots_per_epoch: Slot,
stats: &mut crate::accounts_hash::HashStats,
) {
// Nothing to do if ancient append vecs are enabled.
// Ancient slots will be visited by the ancient append vec code and dealt with correctly.
// we expect these ancient append vecs to be old and keeping accounts
// We can expect the normal processes will keep them cleaned.
// If we included them here then ALL accounts in ALL ancient append vecs will be visited by clean each time.
if self.ancient_append_vec_offset.is_some() {
return;
}

let mut mark_time = Measure::start("mark_time");
let mut num_dirty_slots: usize = 0;
let max = storages.max_slot_inclusive();
let acceptable_straggler_slot_count = 100; // do nothing special for these old stores which will likely get cleaned up shortly
let sub = slots_per_epoch + acceptable_straggler_slot_count;
let in_epoch_range_start = max.saturating_sub(sub);
for (slot, storage) in storages.iter_range(&(..in_epoch_range_start)) {
if let Some(storage) = storage {
self.dirty_stores.insert(slot, storage.clone());
num_dirty_slots += 1;
}
}
mark_time.stop();
stats.mark_time_us = mark_time.as_us();
stats.num_dirty_slots = num_dirty_slots;
}

pub fn calculate_accounts_hash_from(
&self,
data_source: CalcAccountsHashDataSource,
Expand Down Expand Up @@ -7112,8 +7116,6 @@ impl AccountsDb {
let storages_start_slot = storages.range().start;
stats.oldest_root = storages_start_slot;

self.mark_old_slots_as_dirty(storages, config.epoch_schedule.slots_per_epoch, &mut stats);

let slot = storages.max_slot_inclusive();
let use_bg_thread_pool = config.use_bg_thread_pool;
let accounts_hash_cache_path = self.accounts_hash_cache_path.clone();
Expand Down Expand Up @@ -9080,6 +9082,20 @@ pub(crate) enum UpdateIndexThreadSelection {
PoolWithThreshold,
}

/// How should old storages be handled in clean_accounts()?
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum OldStoragesPolicy {
/// Clean all old storages, even if they were not explictly marked as dirty.
///
/// This is the default behavior when not skipping rewrites.
Clean,
/// Leave all old storages.
///
/// When skipping rewrites, we intentionally will have ancient storages.
/// Do not clean them up automatically in clean_accounts().
Leave,
}

// These functions/fields are only usable from a dev context (i.e. tests and benches)
#[cfg(feature = "dev-context-only-utils")]
impl AccountStorageEntry {
Expand Down Expand Up @@ -11304,13 +11320,23 @@ pub mod tests {
// updates in later slots in slot 1
assert_eq!(accounts.alive_account_count_in_slot(0), 1);
assert_eq!(accounts.alive_account_count_in_slot(1), 1);
accounts.clean_accounts(Some(0), false, &EpochSchedule::default());
accounts.clean_accounts(
Some(0),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
assert_eq!(accounts.alive_account_count_in_slot(0), 1);
assert_eq!(accounts.alive_account_count_in_slot(1), 1);
assert!(accounts.accounts_index.contains_with(&pubkey, None, None));

// Now the account can be cleaned up
accounts.clean_accounts(Some(1), false, &EpochSchedule::default());
accounts.clean_accounts(
Some(1),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
assert_eq!(accounts.alive_account_count_in_slot(0), 0);
assert_eq!(accounts.alive_account_count_in_slot(1), 0);

Expand Down Expand Up @@ -12842,7 +12868,12 @@ pub mod tests {
db.add_root_and_flush_write_cache(1);

// Only clean zero lamport accounts up to slot 0
db.clean_accounts(Some(0), false, &EpochSchedule::default());
db.clean_accounts(
Some(0),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);

// Should still be able to find zero lamport account in slot 1
assert_eq!(
Expand Down Expand Up @@ -13996,7 +14027,12 @@ pub mod tests {
db.calculate_accounts_delta_hash(1);

// Clean to remove outdated entry from slot 0
db.clean_accounts(Some(1), false, &EpochSchedule::default());
db.clean_accounts(
Some(1),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);

// Shrink Slot 0
{
Expand All @@ -14015,7 +14051,12 @@ pub mod tests {
// Should be one store before clean for slot 0
db.get_and_assert_single_storage(0);
db.calculate_accounts_delta_hash(2);
db.clean_accounts(Some(2), false, &EpochSchedule::default());
db.clean_accounts(
Some(2),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);

// No stores should exist for slot 0 after clean
assert_no_storages_at_slot(&db, 0);
Expand Down Expand Up @@ -14862,15 +14903,30 @@ pub mod tests {
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 3);

accounts_db.set_latest_full_snapshot_slot(slot2);
accounts_db.clean_accounts(Some(slot2), false, &EpochSchedule::default());
accounts_db.clean_accounts(
Some(slot2),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 2);

accounts_db.set_latest_full_snapshot_slot(slot2);
accounts_db.clean_accounts(None, false, &EpochSchedule::default());
accounts_db.clean_accounts(
None,
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 1);

accounts_db.set_latest_full_snapshot_slot(slot3);
accounts_db.clean_accounts(None, false, &EpochSchedule::default());
accounts_db.clean_accounts(
None,
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 0);
}
);
Expand Down Expand Up @@ -17189,7 +17245,12 @@ pub mod tests {

// calculate the full accounts hash
let full_accounts_hash = {
accounts_db.clean_accounts(Some(slot - 1), false, &EpochSchedule::default());
accounts_db.clean_accounts(
Some(slot - 1),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
let (storages, _) = accounts_db.get_snapshot_storages(..=slot);
let storages = SortedStorages::new(&storages);
accounts_db.calculate_accounts_hash(
Expand Down Expand Up @@ -17255,7 +17316,12 @@ pub mod tests {
// calculate the incremental accounts hash
let incremental_accounts_hash = {
accounts_db.set_latest_full_snapshot_slot(full_accounts_hash_slot);
accounts_db.clean_accounts(Some(slot - 1), false, &EpochSchedule::default());
accounts_db.clean_accounts(
Some(slot - 1),
false,
&EpochSchedule::default(),
OldStoragesPolicy::Leave,
);
let (storages, _) =
accounts_db.get_snapshot_storages(full_accounts_hash_slot + 1..=slot);
let storages = SortedStorages::new(&storages);
Expand Down
35 changes: 26 additions & 9 deletions runtime/src/bank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ use {
accounts::{AccountAddressFilter, Accounts, PubkeyAccountSlot},
accounts_db::{
AccountStorageEntry, AccountsDb, AccountsDbConfig, CalcAccountsHashDataSource,
DuplicatesLtHash, PubkeyHashAccount, VerifyAccountsHashAndLamportsConfig,
DuplicatesLtHash, OldStoragesPolicy, PubkeyHashAccount,
VerifyAccountsHashAndLamportsConfig,
},
accounts_hash::{
AccountHash, AccountsHash, AccountsLtHash, CalcAccountsHashConfig, HashStats,
Expand Down Expand Up @@ -6114,14 +6115,15 @@ impl Bank {
let should_clean = force_clean || (!skip_shrink && self.slot() > 0);
if should_clean {
info!("Cleaning...");
// We cannot clean past the last full snapshot's slot because we are about to
// We cannot clean past the latest full snapshot's slot because we are about to
// perform an accounts hash calculation *up to that slot*. If we cleaned *past*
// that slot, then accounts could be removed from older storages, which would
// change the accounts hash.
self.rc.accounts.accounts_db.clean_accounts(
Some(latest_full_snapshot_slot),
true,
self.epoch_schedule(),
self.clean_accounts_old_storages_policy(),
);
info!("Cleaning... Done.");
} else {
Expand Down Expand Up @@ -6457,6 +6459,7 @@ impl Bank {
Some(highest_slot_to_clean),
false,
self.epoch_schedule(),
self.clean_accounts_old_storages_policy(),
);
}

Expand All @@ -6472,23 +6475,37 @@ impl Bank {
}

pub(crate) fn shrink_ancient_slots(&self) {
let can_skip_rewrites = self.bank_hash_skips_rent_rewrites();
let test_skip_rewrites_but_include_in_bank_hash = self
.rc
.accounts
.accounts_db
.test_skip_rewrites_but_include_in_bank_hash;
// Invoke ancient slot shrinking only when the validator is
// explicitly configured to do so. This condition may be
// removed when the skip rewrites feature is enabled.
if can_skip_rewrites || test_skip_rewrites_but_include_in_bank_hash {
if self.are_ancient_storages_enabled() {
self.rc
.accounts
.accounts_db
.shrink_ancient_slots(self.epoch_schedule())
}
}

/// Returns if ancient storages are enabled or not
pub fn are_ancient_storages_enabled(&self) -> bool {
let can_skip_rewrites = self.bank_hash_skips_rent_rewrites();
let test_skip_rewrites_but_include_in_bank_hash = self
.rc
.accounts
.accounts_db
.test_skip_rewrites_but_include_in_bank_hash;
can_skip_rewrites || test_skip_rewrites_but_include_in_bank_hash
}

/// Returns how clean_accounts() should handle old storages
fn clean_accounts_old_storages_policy(&self) -> OldStoragesPolicy {
if self.are_ancient_storages_enabled() {
OldStoragesPolicy::Leave
} else {
OldStoragesPolicy::Clean
}
}

pub fn read_cost_tracker(&self) -> LockResult<RwLockReadGuard<CostTracker>> {
self.cost_tracker.read()
}
Expand Down

0 comments on commit 970606e

Please sign in to comment.