Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for HamtV0 from forest(commit hash: b622af5a6) #1808

Merged
merged 18 commits into from
Aug 2, 2023
Merged
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions ipld/hamt/src/hamt.rs
Original file line number Diff line number Diff line change
@@ -15,7 +15,8 @@ use serde::{Serialize, Serializer};

use crate::hash_bits::HashBits;
use crate::node::Node;
use crate::{Config, Error, Hash, HashAlgorithm, Sha256};
use crate::pointer::version::Version;
use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256};

/// Implementation of the HAMT data structure for IPLD.
///
@@ -33,21 +34,26 @@ use crate::{Config, Error, Hash, HashAlgorithm, Sha256};
/// assert_eq!(map.get::<_>(&1).unwrap(), None);
/// let cid = map.flush().unwrap();
/// ```
pub type Hamt<BS, V, K = BytesKey> = HamtImpl<BS, V, version::V3, K, Sha256>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to continue to take H as a type parameter to avoid breaking the API.

/// Legacy amt V0
pub type Hamtv0<BS, V, K = BytesKey> = HamtImpl<BS, V, version::V0, K, Sha256>;

#[derive(Debug)]
pub struct Hamt<BS, V, K = BytesKey, H = Sha256> {
root: Node<K, V, H>,
pub struct HamtImpl<BS, V, Ver, K = BytesKey, H = Sha256> {
root: Node<K, V, Ver, H>,
store: BS,
conf: Config,
hash: PhantomData<H>,
/// Remember the last flushed CID until it changes.
flushed_cid: Option<Cid>,
}

impl<BS, V, K, H> Serialize for Hamt<BS, V, K, H>
impl<BS, V, Ver, K, H> Serialize for HamtImpl<BS, V, Ver, K, H>
where
K: Serialize,
V: Serialize,
H: HashAlgorithm,
Ver: Version,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -57,17 +63,20 @@ where
}
}

impl<K: PartialEq, V: PartialEq, S: Blockstore, H: HashAlgorithm> PartialEq for Hamt<S, V, K, H> {
impl<K: PartialEq, V: PartialEq, S: Blockstore, H: HashAlgorithm, Ver> PartialEq
for HamtImpl<S, V, Ver, K, H>
{
fn eq(&self, other: &Self) -> bool {
self.root == other.root
}
}

impl<BS, V, K, H> Hamt<BS, V, K, H>
impl<BS, V, Ver, K, H> HamtImpl<BS, V, Ver, K, H>
where
K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned,
V: Serialize + DeserializeOwned,
BS: Blockstore,
Ver: Version,
H: HashAlgorithm,
{
pub fn new(store: BS) -> Self {
5 changes: 4 additions & 1 deletion ipld/hamt/src/lib.rs
Original file line number Diff line number Diff line change
@@ -23,12 +23,15 @@ pub use forest_hash_utils::{BytesKey, Hash};
use serde::{Deserialize, Serialize};

pub use self::error::Error;
pub use self::hamt::Hamt;
pub use self::hamt::{Hamt, Hamtv0};
pub use self::hash::*;
pub use self::hash_algorithm::*;
pub use self::pointer::version;

/// Default bit width for indexing a hash at each depth level
const DEFAULT_BIT_WIDTH: u32 = 8;
/// Default bit width for indexing a hash at each depth level for Hamt v0
pub const DEFAULT_BIT_WIDTH_V0: u32 = 5;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make this private unless we need it to be public for some reason.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed this field due to #1828 and filecoin-project/builtin-actors#1346. Basically, the user should just pass the bit-width they need and I'm going to deprecate the "default" methods as they're useless.

As far as I can tell, the v0 bit-width was also 8 by default. We just set it to 5 everywhere.


/// Configuration options for a HAMT instance.
#[derive(Debug, Clone)]
34 changes: 19 additions & 15 deletions ipld/hamt/src/node.rs
Original file line number Diff line number Diff line change
@@ -17,26 +17,28 @@ use super::bitfield::Bitfield;
use super::hash_bits::HashBits;
use super::pointer::Pointer;
use super::{Error, Hash, HashAlgorithm, KeyValuePair};
use crate::pointer::version::Version;
use crate::Config;

/// Node in Hamt tree which contains bitfield of set indexes and pointers to nodes
#[derive(Debug)]
pub(crate) struct Node<K, V, H> {
pub(crate) struct Node<K, V, Ver, H> {
pub(crate) bitfield: Bitfield,
pub(crate) pointers: Vec<Pointer<K, V, H>>,
pub(crate) pointers: Vec<Pointer<K, V, Ver, H>>,
hash: PhantomData<H>,
}

impl<K: PartialEq, V: PartialEq, H> PartialEq for Node<K, V, H> {
impl<K: PartialEq, V: PartialEq, H, Ver> PartialEq for Node<K, V, H, Ver> {
fn eq(&self, other: &Self) -> bool {
(self.bitfield == other.bitfield) && (self.pointers == other.pointers)
}
}

impl<K, V, H> Serialize for Node<K, V, H>
impl<K, V, Ver, H> Serialize for Node<K, V, Ver, H>
where
K: Serialize,
V: Serialize,
Ver: self::Version,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -46,10 +48,11 @@ where
}
}

impl<'de, K, V, H> Deserialize<'de> for Node<K, V, H>
impl<'de, K, V, Ver, H> Deserialize<'de> for Node<K, V, Ver, H>
where
K: DeserializeOwned,
V: DeserializeOwned,
Ver: Version,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
@@ -64,7 +67,7 @@ where
}
}

impl<K, V, H> Default for Node<K, V, H> {
impl<K, V, H, Ver> Default for Node<K, V, H, Ver> {
fn default() -> Self {
Node {
bitfield: Bitfield::zero(),
@@ -74,11 +77,12 @@ impl<K, V, H> Default for Node<K, V, H> {
}
}

impl<K, V, H> Node<K, V, H>
impl<K, V, Ver, H> Node<K, V, Ver, H>
where
K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned,
H: HashAlgorithm,
V: Serialize + DeserializeOwned,
Ver: Version,
{
pub fn set<S: Blockstore>(
&mut self,
@@ -318,7 +322,7 @@ where
// Link node is cached
cached_node
} else {
let node: Box<Node<K, V, H>> = if let Some(node) = store.get_cbor(cid)? {
let node: Box<Node<K, V, Ver, H>> = if let Some(node) = store.get_cbor(cid)? {
node
} else {
#[cfg(not(feature = "ignore-dead-links"))]
@@ -367,7 +371,7 @@ where
self.insert_child(idx, key, value);
} else {
// Need to insert some empty nodes reserved for links.
let mut sub = Node::<K, V, H>::default();
let mut sub = Node::<K, V, Ver, H>::default();
sub.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite)?;
self.insert_child_dirty(idx, Box::new(sub));
}
@@ -433,7 +437,7 @@ where
});

let consumed = hashed_key.consumed;
let mut sub = Node::<K, V, H>::default();
let mut sub = Node::<K, V, Ver, H>::default();
let modified = sub.modify_value(
hashed_key,
conf,
@@ -568,7 +572,7 @@ where
Ok(())
}

fn rm_child(&mut self, i: usize, idx: u32) -> Pointer<K, V, H> {
fn rm_child(&mut self, i: usize, idx: u32) -> Pointer<K, V, Ver, H> {
self.bitfield.clear_bit(idx);
self.pointers.remove(i)
}
@@ -579,7 +583,7 @@ where
self.pointers.insert(i, Pointer::from_key_value(key, value))
}

fn insert_child_dirty(&mut self, idx: u32, node: Box<Node<K, V, H>>) {
fn insert_child_dirty(&mut self, idx: u32, node: Box<Node<K, V, Ver, H>>) {
let i = self.index_for_bit_pos(idx);
self.bitfield.set_bit(idx);
self.pointers.insert(i, Pointer::Dirty(node))
@@ -591,19 +595,19 @@ where
mask.and(&self.bitfield).count_ones()
}

fn get_child_mut(&mut self, i: usize) -> &mut Pointer<K, V, H> {
fn get_child_mut(&mut self, i: usize) -> &mut Pointer<K, V, Ver, H> {
&mut self.pointers[i]
}

fn get_child(&self, i: usize) -> &Pointer<K, V, H> {
fn get_child(&self, i: usize) -> &Pointer<K, V, Ver, H> {
&self.pointers[i]
}

/// Clean after delete to retrieve canonical form.
///
/// Returns true if the child pointer is completely empty and can be removed,
/// which can happen if we artificially inserted nodes during insertion.
fn clean(child: &mut Pointer<K, V, H>, conf: &Config, depth: u32) -> Result<bool, Error> {
fn clean(child: &mut Pointer<K, V, Ver, H>, conf: &Config, depth: u32) -> Result<bool, Error> {
match child.clean(conf, depth) {
Ok(()) => Ok(false),
Err(Error::ZeroPointers) if depth < conf.min_data_depth => Ok(true),
111 changes: 97 additions & 14 deletions ipld/hamt/src/pointer.rs
Original file line number Diff line number Diff line change
@@ -15,18 +15,37 @@ use super::node::Node;
use super::{Error, Hash, HashAlgorithm, KeyValuePair};
use crate::Config;

pub mod version {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make this crate-private if possible, or #[doc(hidden)] otherwise.

#[derive(PartialEq, Eq, Debug)]
pub struct V0;
#[derive(PartialEq, Eq, Debug)]
pub struct V3;

pub trait Version {
const NUMBER: usize;
}

impl Version for V0 {
const NUMBER: usize = 0;
}

impl Version for V3 {
const NUMBER: usize = 3;
}
}

/// Pointer to index values or a link to another child node.
#[derive(Debug)]
pub(crate) enum Pointer<K, V, H> {
pub(crate) enum Pointer<K, V, Ver, H> {
Values(Vec<KeyValuePair<K, V>>),
Link {
cid: Cid,
cache: OnceCell<Box<Node<K, V, H>>>,
cache: OnceCell<Box<Node<K, V, Ver, H>>>,
},
Dirty(Box<Node<K, V, H>>),
Dirty(Box<Node<K, V, Ver, H>>),
}

impl<K: PartialEq, V: PartialEq, H> PartialEq for Pointer<K, V, H> {
impl<K: PartialEq, V: PartialEq, H, Ver> PartialEq for Pointer<K, V, H, Ver> {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Pointer::Values(a), Pointer::Values(b)) => a == b,
@@ -37,25 +56,58 @@ impl<K: PartialEq, V: PartialEq, H> PartialEq for Pointer<K, V, H> {
}
}

mod pointer_v0 {
use cid::Cid;
use serde::Serialize;

use crate::KeyValuePair;

use super::Pointer;

#[derive(Serialize)]
pub(super) enum PointerSer<'a, K, V> {
Vals(&'a [KeyValuePair<K, V>]),
Link(&'a Cid),
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused at how this works. For v0, we want to serialize to a map of "0"/"1" to a cid/list of links. As far as I know, this here will serialize use "Vals" and "Link" as the keys?


impl<'a, K, V, Ver, H> TryFrom<&'a Pointer<K, V, Ver, H>> for PointerSer<'a, K, V> {
type Error = &'static str;

fn try_from(pointer: &'a Pointer<K, V, Ver, H>) -> Result<Self, Self::Error> {
match pointer {
Pointer::Values(vals) => Ok(PointerSer::Vals(vals.as_ref())),
Pointer::Link { cid, .. } => Ok(PointerSer::Link(cid)),
Pointer::Dirty(_) => Err("Cannot serialize cached values"),
}
}
}
}

/// Serialize the Pointer like an untagged enum.
impl<K, V, H> Serialize for Pointer<K, V, H>
impl<K, V, Ver, H> Serialize for Pointer<K, V, Ver, H>
where
K: Serialize,
V: Serialize,
Ver: self::version::Version,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Pointer::Values(vals) => vals.serialize(serializer),
Pointer::Link { cid, .. } => cid.serialize(serializer),
Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")),
match Ver::NUMBER {
0 => pointer_v0::PointerSer::try_from(self)
.map_err(ser::Error::custom)?
.serialize(serializer),
_ => match self {
Pointer::Values(vals) => vals.serialize(serializer),
Pointer::Link { cid, .. } => cid.serialize(serializer),
Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")),
},
}
}
}

impl<K, V, H> TryFrom<Ipld> for Pointer<K, V, H>
impl<K, V, Ver, H> TryFrom<Ipld> for Pointer<K, V, Ver, H>
where
K: DeserializeOwned,
V: DeserializeOwned,
@@ -81,26 +133,57 @@ where
}

/// Deserialize the Pointer like an untagged enum.
impl<'de, K, V, H> Deserialize<'de> for Pointer<K, V, H>
impl<'de, K, V, Ver, H> Deserialize<'de> for Pointer<K, V, Ver, H>
where
K: DeserializeOwned,
V: DeserializeOwned,
Ver: self::version::Version,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ipld::deserialize(deserializer).and_then(|ipld| ipld.try_into().map_err(de::Error::custom))
match Ver::NUMBER {
0 => {
let ipld = Ipld::deserialize(deserializer)?;
let (_key, value) = match ipld {
Ipld::Map(map) => map
.into_iter()
.next()
.ok_or("Expected at least one element".to_string()),
other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)),
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should decode this as a tagged enum instead of using Ipld and looking at the types.

#[derive(Deserialize, Serialize)]
enum PointerDe<K, V> {
    #[serde(name = "l")]
    Link(Cid),
    #[serde(name = "v")]
    Values(KeyValuePair<K, V>),
}

Copy link
Contributor Author

@creativcoder creativcoder Jul 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i had to change l and v to 0 and 1 respectively to read genesis header in forest and I'm able to boot forest daemon.
Changes made:

    #[derive(Serialize)]
    pub(super) enum PointerSer<'a, K, V> {
        #[serde(rename = "0")]
        Vals(&'a [KeyValuePair<K, V>]),
        #[serde(rename = "1")]
        Link(&'a Cid),
    }

    #[derive(Deserialize, Serialize)]
    pub enum PointerDe<K, V> {
        #[serde(rename = "0")]
        Link(Cid),
        #[serde(rename = "1")]
        Vals(Vec<KeyValuePair<K, V>>),
    }

But Hamtv0 tests in this repo fails with:

thread 'test_default::test_hamtv0' panicked at 'called `Result::unwrap()` on an `Err` value: Dynamic(Serialization error for Cbor protocol: TypeMismatch { name: "CBOR tag", byte: 1 })', ipld/hamt/tests/hamt_tests.rs:1022:69

with the above changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh. I thought it was k/v. I must be thinking of IPFS HAMTs.

But Hamtv0 tests in this repo fails with:

Hm. Something funky is going on there. Can you push the code?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(or push it to a new branch)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed the changes with failing hamtv0 tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was a sneaky bug. The Vals/Pointer variants were swapped.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah i see, interesting! thanks

.map_err(de::Error::custom)?;
match value {
ipld_list @ Ipld::List(_) => {
let values: Vec<KeyValuePair<K, V>> =
Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?;
Ok(Self::Values(values))
}
Ipld::Link(cid) => Ok(Self::Link {
cid,
cache: Default::default(),
}),
other => Err(format!(
"Expected `Ipld::List` or `Ipld::Link`, got {:#?}",
other
)),
}
.map_err(de::Error::custom)
}
_ => Ipld::deserialize(deserializer)
.and_then(|ipld| ipld.try_into().map_err(de::Error::custom)),
}
}
}

impl<K, V, H> Default for Pointer<K, V, H> {
impl<K, V, H, Ver> Default for Pointer<K, V, H, Ver> {
fn default() -> Self {
Pointer::Values(Vec::new())
}
}

impl<K, V, H> Pointer<K, V, H>
impl<K, V, Ver, H> Pointer<K, V, Ver, H>
where
K: Serialize + DeserializeOwned + Hash + PartialOrd,
V: Serialize + DeserializeOwned,
18 changes: 17 additions & 1 deletion ipld/hamt/tests/hamt_tests.rs
Original file line number Diff line number Diff line change
@@ -893,7 +893,8 @@ fn tstring(v: impl Display) -> BytesKey {
}

mod test_default {
use fvm_ipld_blockstore::tracking::BSStats;
use fvm_ipld_blockstore::{tracking::BSStats, MemoryBlockstore};
use fvm_ipld_hamt::{Config, Hamtv0, DEFAULT_BIT_WIDTH_V0};
use quickcheck_macros::quickcheck;

use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs};
@@ -1007,6 +1008,21 @@ mod test_default {
super::clean_child_ordering(HamtFactory::default(), Some(stats), cids);
}

#[test]
fn test_hamtv0() {
let config = Config {
bit_width: DEFAULT_BIT_WIDTH_V0,
..Default::default()
};
let store = MemoryBlockstore::default();
let mut hamtv0: Hamtv0<_, _, usize> = Hamtv0::new_with_config(&store, config.clone());
hamtv0.set(1, "world".to_string()).unwrap();
assert_eq!(hamtv0.get(&1).unwrap(), Some(&"world".to_string()));
let c = hamtv0.flush().unwrap();
let new_hamt = Hamtv0::load_with_config(&c, &store, config).unwrap();
assert_eq!(hamtv0, new_hamt);
}

#[quickcheck]
fn prop_cid_indep_of_insert_order(kvs: UniqueKeyValuePairs<u8, i64>, seed: u64) -> bool {
super::prop_cid_indep_of_insert_order(HamtFactory::default(), kvs, seed)