Skip to content

Commit

Permalink
introduce scan_pubkeys for clean storage iteration (solana-labs#507)
Browse files Browse the repository at this point in the history
* introduce pubkey_iter for clean storage iteration

* rename scan_pubkeys

* pr feedback

* pr feedback
  • Loading branch information
jeffwashington authored Apr 1, 2024
1 parent 79e316e commit 620f565
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
16 changes: 12 additions & 4 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2904,8 +2904,8 @@ impl AccountsDb {
dirty_ancient_stores.fetch_add(1, Ordering::Relaxed);
}
oldest_dirty_slot = oldest_dirty_slot.min(*slot);
store.accounts.account_iter().for_each(|account| {
pubkeys.insert(*account.pubkey());
store.accounts.scan_pubkeys(|k| {
pubkeys.insert(*k);
});
});
oldest_dirty_slot
Expand Down Expand Up @@ -16664,11 +16664,19 @@ pub mod tests {
) -> Vec<(Pubkey, AccountSharedData)> {
storages
.flat_map(|storage| {
storage
let vec = storage
.accounts
.account_iter()
.map(|account| (*account.pubkey(), account.to_account_shared_data()))
.collect::<Vec<_>>()
.collect::<Vec<_>>();
// make sure scan_pubkeys results match
// Note that we assume traversals are both in the same order, but this doesn't have to be true.
let mut compare = Vec::default();
storage.accounts.scan_pubkeys(|k| {
compare.push(*k);
});
assert_eq!(compare, vec.iter().map(|(k, _)| *k).collect::<Vec<_>>());
vec
})
.collect::<Vec<_>>()
}
Expand Down
8 changes: 8 additions & 0 deletions accounts-db/src/accounts_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ impl AccountsFile {
AccountsFileIter::new(self)
}

/// iterate over all pubkeys
pub(crate) fn scan_pubkeys(&self, callback: impl FnMut(&Pubkey)) {
match self {
Self::AppendVec(av) => av.scan_pubkeys(callback),
Self::TieredStorage(_) => unimplemented!(),
}
}

/// Return a vector of account metadata for each account, starting from `offset`.
pub fn accounts(&self, offset: usize) -> Vec<StoredAccountMeta> {
match self {
Expand Down
50 changes: 50 additions & 0 deletions accounts-db/src/append_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ impl<'append_vec> ReadableAccount for AppendVecStoredAccountMeta<'append_vec> {
}
}

/// offsets to help navigate the persisted format of `AppendVec`
#[derive(Debug)]
struct AccountOffsets {
/// offset to the end of the &[u8] data
offset_to_end_of_data: usize,
/// offset to the next account. This will be aligned.
next_account_offset: usize,
}

/// A thread-safe, file-backed block of memory used to store `Account` instances. Append operations
/// are serialized such that only one thread updates the internal `append_lock` at a time. No
/// restrictions are placed on reading. That is, one may read items from one thread while another
Expand Down Expand Up @@ -552,6 +561,47 @@ impl AppendVec {
self.path.clone()
}

/// help with the math of offsets when navigating the on-disk layout in an AppendVec.
/// data is at the end of each account and is variable sized
/// the next account is then aligned on a 64 bit boundary.
/// With these helpers, we can skip over reading some of the data depending on what the caller wants.
fn next_account_offset(start_offset: usize, stored_meta: &StoredMeta) -> AccountOffsets {
let start_of_data = start_offset
+ std::mem::size_of::<StoredMeta>()
+ std::mem::size_of::<AccountMeta>()
+ std::mem::size_of::<AccountHash>();
let aligned_data_len = u64_align!(stored_meta.data_len as usize);
let next_account_offset = start_of_data + aligned_data_len;
let offset_to_end_of_data = start_of_data + stored_meta.data_len as usize;

AccountOffsets {
next_account_offset,
offset_to_end_of_data,
}
}

/// iterate over all pubkeys and call `callback`.
/// This iteration does not deserialize and populate each field in `StoredAccountMeta`.
/// `data` is completely ignored, for example.
/// Also, no references have to be maintained/returned from an iterator function.
/// This fn can operate on a batch of data at once.
pub(crate) fn scan_pubkeys(&self, mut callback: impl FnMut(&Pubkey)) {
let mut offset = 0;
loop {
let Some((stored_meta, _)) = self.get_type::<StoredMeta>(offset) else {
// eof
break;
};
let next = Self::next_account_offset(offset, stored_meta);
if next.offset_to_end_of_data > self.len() {
// data doesn't fit, so don't include this pubkey
break;
}
callback(&stored_meta.pubkey);
offset = next.next_account_offset;
}
}

/// Return iterator for account metadata
pub fn account_iter(&self) -> AppendVecAccountsIter {
AppendVecAccountsIter::new(self)
Expand Down

0 comments on commit 620f565

Please sign in to comment.