From 45057b3ea9e9841ac0ebfd92b6c325c54c933054 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Wed, 5 Jul 2023 13:24:24 +0000 Subject: [PATCH 01/16] add support for hamtv0 from b622af5a6 --- fvm/src/init_actor.rs | 1 + ipld/hamt/src/hamt.rs | 40 +++++++++++++--- ipld/hamt/src/lib.rs | 1 + ipld/hamt/src/node.rs | 91 ++++++++++++++++++++--------------- ipld/hamt/src/pointer.rs | 100 +++++++++++++++++++++++++++++---------- 5 files changed, 164 insertions(+), 69 deletions(-) diff --git a/fvm/src/init_actor.rs b/fvm/src/init_actor.rs index fb599afd6..7c555e19c 100644 --- a/fvm/src/init_actor.rs +++ b/fvm/src/init_actor.rs @@ -23,6 +23,7 @@ use fvm_ipld_encoding::CborStore; use fvm_ipld_hamt::Hamt; use fvm_shared::address::{Address, Payload}; use fvm_shared::{ActorID, HAMT_BIT_WIDTH}; +// use fvm_ipld_hamt:: use crate::state_tree::{ActorState, StateTree}; diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index fbe3332d1..13c299041 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -15,7 +15,31 @@ use serde::{Serialize, Serializer}; use crate::hash_bits::HashBits; use crate::node::Node; -use crate::{Config, Error, Hash, HashAlgorithm, Sha256}; +use crate::pointer::version::Version; +use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; + +// pub mod version { +// #[derive(PartialEq, Eq, Debug)] +// pub struct V0; +// #[derive(PartialEq, Eq, Debug)] +// pub struct V3; + +// pub trait Version { +// const NUMBER: usize; +// } + +// impl Version for V0 { +// const NUMBER: usize = 0; +// } + +// impl Version for V3 { +// const NUMBER: usize = 3; +// } +// } + +pub type Hamt = HamtImpl; +/// Legacy amt V0 +pub type Hamtv0 = HamtImpl; /// Implementation of the HAMT data structure for IPLD. /// @@ -34,8 +58,8 @@ use crate::{Config, Error, Hash, HashAlgorithm, Sha256}; /// let cid = map.flush().unwrap(); /// ``` #[derive(Debug)] -pub struct Hamt { - root: Node, +pub struct HamtImpl { + root: Node, store: BS, conf: Config, hash: PhantomData, @@ -43,11 +67,12 @@ pub struct Hamt { flushed_cid: Option, } -impl Serialize for Hamt +impl Serialize for HamtImpl where K: Serialize, V: Serialize, H: HashAlgorithm, + Ver: Version, { fn serialize(&self, serializer: S) -> Result where @@ -57,17 +82,20 @@ where } } -impl PartialEq for Hamt { +impl PartialEq + for HamtImpl +{ fn eq(&self, other: &Self) -> bool { self.root == other.root } } -impl Hamt +impl HamtImpl where K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned, V: Serialize + DeserializeOwned, BS: Blockstore, + Ver: Version, H: HashAlgorithm, { pub fn new(store: BS) -> Self { diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 2e8241bd8..adbad6aae 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -26,6 +26,7 @@ pub use self::error::Error; pub use self::hamt::Hamt; pub use self::hash::*; pub use self::hash_algorithm::*; +pub use self::pointer::version; /// Default bit width for indexing a hash at each depth level const DEFAULT_BIT_WIDTH: u32 = 8; diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 81bad7d67..3fdff4792 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -15,25 +15,26 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use super::bitfield::Bitfield; use super::hash_bits::HashBits; -use super::pointer::Pointer; +use super::pointer::PointerImpl; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; +use crate::pointer::version::Version; use crate::Config; /// Node in Hamt tree which contains bitfield of set indexes and pointers to nodes #[derive(Debug)] -pub(crate) struct Node { +pub(crate) struct Node { pub(crate) bitfield: Bitfield, - pub(crate) pointers: Vec>, + pub(crate) pointers: Vec>, hash: PhantomData, } -impl PartialEq for Node { +impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { (self.bitfield == other.bitfield) && (self.pointers == other.pointers) } } -impl Serialize for Node +impl Serialize for Node where K: Serialize, V: Serialize, @@ -46,10 +47,11 @@ where } } -impl<'de, K, V, H> Deserialize<'de> for Node +impl<'de, K, V, Ver, H> Deserialize<'de> for Node where K: DeserializeOwned, V: DeserializeOwned, + Ver: Version, { fn deserialize(deserializer: D) -> Result where @@ -64,7 +66,7 @@ where } } -impl Default for Node { +impl Default for Node { fn default() -> Self { Node { bitfield: Bitfield::zero(), @@ -74,11 +76,12 @@ impl Default for Node { } } -impl Node +impl Node where K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned, H: HashAlgorithm, V: Serialize + DeserializeOwned, + Ver: Version, { pub fn set( &mut self, @@ -144,7 +147,7 @@ where { for p in &self.pointers { match p { - Pointer::Link { cid, cache } => { + PointerImpl::Link { cid, cache } => { if let Some(cached_node) = cache.get() { cached_node.for_each(store, f)? } else { @@ -163,12 +166,13 @@ where cache_node.for_each(store, f)? } } - Pointer::Dirty(node) => node.for_each(store, f)?, - Pointer::Values(kvs) => { + PointerImpl::Dirty(node) => node.for_each(store, f)?, + PointerImpl::Values(kvs) => { for kv in kvs { f(kv.0.borrow(), kv.1.borrow())?; } } + PointerImpl::Phantom(_) => unreachable!(), } } Ok(()) @@ -202,7 +206,7 @@ where // skip exploration of subtrees that are before the subtree which contains the cursor for p in &self.pointers[cindex..] { match p { - Pointer::Link { cid, cache } => { + PointerImpl::Link { cid, cache } => { if let Some(cached_node) = cache.get() { let (traversed, key) = cached_node.for_each_ranged( store, @@ -241,7 +245,7 @@ where } } } - Pointer::Dirty(node) => { + PointerImpl::Dirty(node) => { let (traversed, key) = node.for_each_ranged( store, conf, @@ -254,7 +258,7 @@ where return Ok((traversed_count, key)); } } - Pointer::Values(kvs) => { + PointerImpl::Values(kvs) => { for kv in kvs { if limit.map_or(false, |l| traversed_count == l) { // we have already found all requested items, return the key of the next item @@ -271,6 +275,7 @@ where } } } + PointerImpl::Phantom(_) => unreachable!(), } } @@ -313,12 +318,12 @@ where let child = self.get_child(cindex); let node = match child { - Pointer::Link { cid, cache } => { + PointerImpl::Link { cid, cache } => { if let Some(cached_node) = cache.get() { // Link node is cached cached_node } else { - let node: Box> = if let Some(node) = store.get_cbor(cid)? { + let node: Box> = if let Some(node) = store.get_cbor(cid)? { node } else { #[cfg(not(feature = "ignore-dead-links"))] @@ -331,10 +336,11 @@ where cache.get_or_init(|| node) } } - Pointer::Dirty(node) => node, - Pointer::Values(vals) => { + PointerImpl::Dirty(node) => node, + PointerImpl::Values(vals) => { return Ok(vals.iter().find(|kv| key.eq(kv.key().borrow()))); } + PointerImpl::Phantom(_) => unreachable!(), }; node.get_value(hashed_key, conf, key, store) @@ -367,7 +373,7 @@ where self.insert_child(idx, key, value); } else { // Need to insert some empty nodes reserved for links. - let mut sub = Node::::default(); + let mut sub = Node::::default(); sub.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite)?; self.insert_child_dirty(idx, Box::new(sub)); } @@ -378,7 +384,7 @@ where let child = self.get_child_mut(cindex); match child { - Pointer::Link { cid, cache } => { + PointerImpl::Link { cid, cache } => { cache.get_or_try_init(|| { store .get_cbor(cid)? @@ -396,14 +402,15 @@ where overwrite, )?; if modified { - *child = Pointer::Dirty(std::mem::take(child_node)); + *child = PointerImpl::Dirty(std::mem::take(child_node)); } Ok((old, modified)) } - Pointer::Dirty(node) => { + PointerImpl::Dirty(node) => { node.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite) } - Pointer::Values(vals) => { + PointerImpl::Phantom(_) => unreachable!(), + PointerImpl::Values(vals) => { // Update, if the key already exists. if let Some(i) = vals.iter().position(|p| p.key() == &key) { if overwrite { @@ -433,7 +440,7 @@ where }); let consumed = hashed_key.consumed; - let mut sub = Node::::default(); + let mut sub = Node::::default(); let modified = sub.modify_value( hashed_key, conf, @@ -456,7 +463,7 @@ where )?; } - *child = Pointer::Dirty(Box::new(sub)); + *child = PointerImpl::Dirty(Box::new(sub)); return Ok(modified); } @@ -497,7 +504,7 @@ where let child = self.get_child_mut(cindex); match child { - Pointer::Link { cid, cache } => { + PointerImpl::Link { cid, cache } => { cache.get_or_try_init(|| { store .get_cbor(cid)? @@ -508,7 +515,7 @@ where let deleted = child_node.rm_value(hashed_key, conf, depth + 1, key, store)?; if deleted.is_some() { - *child = Pointer::Dirty(std::mem::take(child_node)); + *child = PointerImpl::Dirty(std::mem::take(child_node)); if Self::clean(child, conf, depth)? { self.rm_child(cindex, idx); } @@ -516,7 +523,7 @@ where Ok(deleted) } - Pointer::Dirty(node) => { + PointerImpl::Dirty(node) => { // Delete value and return deleted value let deleted = node.rm_value(hashed_key, conf, depth + 1, key, store)?; @@ -526,12 +533,12 @@ where Ok(deleted) } - Pointer::Values(vals) => { + PointerImpl::Values(vals) => { // Delete value for (i, p) in vals.iter().enumerate() { if key.eq(p.key().borrow()) { let old = if vals.len() == 1 { - if let Pointer::Values(new_v) = self.rm_child(cindex, idx) { + if let PointerImpl::Values(new_v) = self.rm_child(cindex, idx) { new_v.into_iter().next().unwrap() } else { unreachable!() @@ -545,12 +552,13 @@ where Ok(None) } + PointerImpl::Phantom(_) => unreachable!(), } } pub fn flush(&mut self, store: &S) -> Result<(), Error> { for pointer in &mut self.pointers { - if let Pointer::Dirty(node) = pointer { + if let PointerImpl::Dirty(node) = pointer { // Flush cached sub node to clear it's cache node.flush(store)?; @@ -561,14 +569,14 @@ where let cache = OnceCell::from(std::mem::take(node)); // Replace cached node with Cid link - *pointer = Pointer::Link { cid, cache }; + *pointer = PointerImpl::Link { cid, cache }; } } Ok(()) } - fn rm_child(&mut self, i: usize, idx: u32) -> Pointer { + fn rm_child(&mut self, i: usize, idx: u32) -> PointerImpl { self.bitfield.clear_bit(idx); self.pointers.remove(i) } @@ -576,13 +584,14 @@ where fn insert_child(&mut self, idx: u32, key: K, value: V) { let i = self.index_for_bit_pos(idx); self.bitfield.set_bit(idx); - self.pointers.insert(i, Pointer::from_key_value(key, value)) + self.pointers + .insert(i, PointerImpl::from_key_value(key, value)) } - fn insert_child_dirty(&mut self, idx: u32, node: Box>) { + fn insert_child_dirty(&mut self, idx: u32, node: Box>) { let i = self.index_for_bit_pos(idx); self.bitfield.set_bit(idx); - self.pointers.insert(i, Pointer::Dirty(node)) + self.pointers.insert(i, PointerImpl::Dirty(node)) } fn index_for_bit_pos(&self, bp: u32) -> usize { @@ -591,11 +600,11 @@ where mask.and(&self.bitfield).count_ones() } - fn get_child_mut(&mut self, i: usize) -> &mut Pointer { + fn get_child_mut(&mut self, i: usize) -> &mut PointerImpl { &mut self.pointers[i] } - fn get_child(&self, i: usize) -> &Pointer { + fn get_child(&self, i: usize) -> &PointerImpl { &self.pointers[i] } @@ -603,7 +612,11 @@ where /// /// Returns true if the child pointer is completely empty and can be removed, /// which can happen if we artificially inserted nodes during insertion. - fn clean(child: &mut Pointer, conf: &Config, depth: u32) -> Result { + fn clean( + child: &mut PointerImpl, + conf: &Config, + depth: u32, + ) -> Result { match child.clean(conf, depth) { Ok(()) => Ok(false), Err(Error::ZeroPointers) if depth < conf.min_data_depth => Ok(true), diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index ef4444619..09d7826de 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -15,30 +15,50 @@ use super::node::Node; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; use crate::Config; +pub mod version { + #[derive(PartialEq, Eq, Debug)] + pub struct V0; + #[derive(PartialEq, Eq, Debug)] + pub struct V3; + + pub trait Version { + const NUMBER: usize; + } + + impl Version for V0 { + const NUMBER: usize = 0; + } + + impl Version for V3 { + const NUMBER: usize = 3; + } +} + /// Pointer to index values or a link to another child node. #[derive(Debug)] -pub(crate) enum Pointer { +pub(crate) enum PointerImpl { Values(Vec>), Link { cid: Cid, - cache: OnceCell>>, + cache: OnceCell>>, }, - Dirty(Box>), + Dirty(Box>), + Phantom(std::marker::PhantomData), } -impl PartialEq for Pointer { +impl PartialEq for PointerImpl { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Pointer::Values(a), Pointer::Values(b)) => a == b, - (Pointer::Link { cid: a, .. }, Pointer::Link { cid: b, .. }) => a == b, - (Pointer::Dirty(a), Pointer::Dirty(b)) => a == b, + (PointerImpl::Values(a), PointerImpl::Values(b)) => a == b, + (PointerImpl::Link { cid: a, .. }, PointerImpl::Link { cid: b, .. }) => a == b, + (PointerImpl::Dirty(a), PointerImpl::Dirty(b)) => a == b, _ => false, } } } /// Serialize the Pointer like an untagged enum. -impl Serialize for Pointer +impl Serialize for PointerImpl where K: Serialize, V: Serialize, @@ -48,14 +68,15 @@ where S: Serializer, { match self { - Pointer::Values(vals) => vals.serialize(serializer), - Pointer::Link { cid, .. } => cid.serialize(serializer), - Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + PointerImpl::Values(vals) => vals.serialize(serializer), + PointerImpl::Link { cid, .. } => cid.serialize(serializer), + PointerImpl::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + PointerImpl::Phantom(_) => unreachable!(), } } } -impl TryFrom for Pointer +impl TryFrom for PointerImpl where K: DeserializeOwned, V: DeserializeOwned, @@ -81,40 +102,71 @@ where } /// Deserialize the Pointer like an untagged enum. -impl<'de, K, V, H> Deserialize<'de> for Pointer +impl<'de, K, V, Ver, H> Deserialize<'de> for PointerImpl where K: DeserializeOwned, V: DeserializeOwned, + Ver: self::version::Version, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { - Ipld::deserialize(deserializer).and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) + match Ver::NUMBER { + 0 => { + let ipld = Ipld::deserialize(deserializer)?; + let (_key, value) = match ipld { + Ipld::Map(map) => map + .into_iter() + .next() + .ok_or("Expected at least one element".to_string()), + other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), + } + .map_err(de::Error::custom)?; + match value { + ipld_list @ Ipld::List(_) => { + let values: Vec> = + Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; + Ok(Self::Values(values)) + } + Ipld::Link(cid) => Ok(Self::Link { + cid, + cache: Default::default(), + }), + other => Err(format!( + "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", + other + )), + } + .map_err(de::Error::custom) + } + _ => Ipld::deserialize(deserializer) + .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), + } } } -impl Default for Pointer { +impl Default for PointerImpl { fn default() -> Self { - Pointer::Values(Vec::new()) + PointerImpl::Values(Vec::new()) } } -impl Pointer +impl PointerImpl where K: Serialize + DeserializeOwned + Hash + PartialOrd, V: Serialize + DeserializeOwned, H: HashAlgorithm, { pub(crate) fn from_key_value(key: K, value: V) -> Self { - Pointer::Values(vec![KeyValuePair::new(key, value)]) + PointerImpl::Values(vec![KeyValuePair::new(key, value)]) } /// Internal method to cleanup children, to ensure consistent tree representation /// after deletes. pub(crate) fn clean(&mut self, conf: &Config, depth: u32) -> Result<(), Error> { match self { - Pointer::Dirty(n) => match n.pointers.len() { + PointerImpl::Dirty(n) => match n.pointers.len() { 0 => Err(Error::ZeroPointers), _ if depth < conf.min_data_depth => { // We are in the shallows where we don't want key-value pairs, just links, @@ -124,12 +176,12 @@ where } 1 => { // Node has only one pointer, swap with parent node - if let Pointer::Values(vals) = &mut n.pointers[0] { + if let PointerImpl::Values(vals) = &mut n.pointers[0] { // Take child values, to ensure canonical ordering let values = std::mem::take(vals); // move parent node up - *self = Pointer::Values(values) + *self = PointerImpl::Values(values) } Ok(()) } @@ -137,7 +189,7 @@ where // If more child values than max width, nothing to change. let mut children_len = 0; for c in n.pointers.iter() { - if let Pointer::Values(vals) = c { + if let PointerImpl::Values(vals) = c { children_len += vals.len(); } else { return Ok(()); @@ -152,7 +204,7 @@ where .pointers .iter_mut() .filter_map(|p| { - if let Pointer::Values(kvs) = p { + if let PointerImpl::Values(kvs) = p { Some(std::mem::take(kvs)) } else { None @@ -168,7 +220,7 @@ where }); // Replace link node with child values - *self = Pointer::Values(child_vals); + *self = PointerImpl::Values(child_vals); Ok(()) } _ => Ok(()), From 8e057e4d767549de227ec6d544713613c913e03b Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Wed, 5 Jul 2023 13:47:29 +0000 Subject: [PATCH 02/16] cleanups --- fvm/src/init_actor.rs | 1 - ipld/hamt/src/hamt.rs | 27 +++------------ ipld/hamt/src/node.rs | 73 +++++++++++++++++++--------------------- ipld/hamt/src/pointer.rs | 44 ++++++++++++------------ 4 files changed, 60 insertions(+), 85 deletions(-) diff --git a/fvm/src/init_actor.rs b/fvm/src/init_actor.rs index 7c555e19c..fb599afd6 100644 --- a/fvm/src/init_actor.rs +++ b/fvm/src/init_actor.rs @@ -23,7 +23,6 @@ use fvm_ipld_encoding::CborStore; use fvm_ipld_hamt::Hamt; use fvm_shared::address::{Address, Payload}; use fvm_shared::{ActorID, HAMT_BIT_WIDTH}; -// use fvm_ipld_hamt:: use crate::state_tree::{ActorState, StateTree}; diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index 13c299041..0ff004717 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -18,29 +18,6 @@ use crate::node::Node; use crate::pointer::version::Version; use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; -// pub mod version { -// #[derive(PartialEq, Eq, Debug)] -// pub struct V0; -// #[derive(PartialEq, Eq, Debug)] -// pub struct V3; - -// pub trait Version { -// const NUMBER: usize; -// } - -// impl Version for V0 { -// const NUMBER: usize = 0; -// } - -// impl Version for V3 { -// const NUMBER: usize = 3; -// } -// } - -pub type Hamt = HamtImpl; -/// Legacy amt V0 -pub type Hamtv0 = HamtImpl; - /// Implementation of the HAMT data structure for IPLD. /// /// # Examples @@ -57,6 +34,10 @@ pub type Hamtv0 = HamtImpl; /// assert_eq!(map.get::<_>(&1).unwrap(), None); /// let cid = map.flush().unwrap(); /// ``` +pub type Hamt = HamtImpl; +/// Legacy amt V0 +pub type Hamtv0 = HamtImpl; + #[derive(Debug)] pub struct HamtImpl { root: Node, diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 3fdff4792..4635dd03e 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use super::bitfield::Bitfield; use super::hash_bits::HashBits; -use super::pointer::PointerImpl; +use super::pointer::Pointer; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; use crate::pointer::version::Version; use crate::Config; @@ -24,7 +24,7 @@ use crate::Config; #[derive(Debug)] pub(crate) struct Node { pub(crate) bitfield: Bitfield, - pub(crate) pointers: Vec>, + pub(crate) pointers: Vec>, hash: PhantomData, } @@ -147,7 +147,7 @@ where { for p in &self.pointers { match p { - PointerImpl::Link { cid, cache } => { + Pointer::Link { cid, cache } => { if let Some(cached_node) = cache.get() { cached_node.for_each(store, f)? } else { @@ -166,13 +166,13 @@ where cache_node.for_each(store, f)? } } - PointerImpl::Dirty(node) => node.for_each(store, f)?, - PointerImpl::Values(kvs) => { + Pointer::Dirty(node) => node.for_each(store, f)?, + Pointer::Values(kvs) => { for kv in kvs { f(kv.0.borrow(), kv.1.borrow())?; } } - PointerImpl::Phantom(_) => unreachable!(), + Pointer::Phantom(_) => unreachable!(), } } Ok(()) @@ -206,7 +206,7 @@ where // skip exploration of subtrees that are before the subtree which contains the cursor for p in &self.pointers[cindex..] { match p { - PointerImpl::Link { cid, cache } => { + Pointer::Link { cid, cache } => { if let Some(cached_node) = cache.get() { let (traversed, key) = cached_node.for_each_ranged( store, @@ -245,7 +245,7 @@ where } } } - PointerImpl::Dirty(node) => { + Pointer::Dirty(node) => { let (traversed, key) = node.for_each_ranged( store, conf, @@ -258,7 +258,7 @@ where return Ok((traversed_count, key)); } } - PointerImpl::Values(kvs) => { + Pointer::Values(kvs) => { for kv in kvs { if limit.map_or(false, |l| traversed_count == l) { // we have already found all requested items, return the key of the next item @@ -275,7 +275,7 @@ where } } } - PointerImpl::Phantom(_) => unreachable!(), + Pointer::Phantom(_) => unreachable!(), } } @@ -318,7 +318,7 @@ where let child = self.get_child(cindex); let node = match child { - PointerImpl::Link { cid, cache } => { + Pointer::Link { cid, cache } => { if let Some(cached_node) = cache.get() { // Link node is cached cached_node @@ -336,11 +336,11 @@ where cache.get_or_init(|| node) } } - PointerImpl::Dirty(node) => node, - PointerImpl::Values(vals) => { + Pointer::Dirty(node) => node, + Pointer::Values(vals) => { return Ok(vals.iter().find(|kv| key.eq(kv.key().borrow()))); } - PointerImpl::Phantom(_) => unreachable!(), + Pointer::Phantom(_) => unreachable!(), }; node.get_value(hashed_key, conf, key, store) @@ -384,7 +384,7 @@ where let child = self.get_child_mut(cindex); match child { - PointerImpl::Link { cid, cache } => { + Pointer::Link { cid, cache } => { cache.get_or_try_init(|| { store .get_cbor(cid)? @@ -402,15 +402,15 @@ where overwrite, )?; if modified { - *child = PointerImpl::Dirty(std::mem::take(child_node)); + *child = Pointer::Dirty(std::mem::take(child_node)); } Ok((old, modified)) } - PointerImpl::Dirty(node) => { + Pointer::Dirty(node) => { node.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite) } - PointerImpl::Phantom(_) => unreachable!(), - PointerImpl::Values(vals) => { + Pointer::Phantom(_) => unreachable!(), + Pointer::Values(vals) => { // Update, if the key already exists. if let Some(i) = vals.iter().position(|p| p.key() == &key) { if overwrite { @@ -463,7 +463,7 @@ where )?; } - *child = PointerImpl::Dirty(Box::new(sub)); + *child = Pointer::Dirty(Box::new(sub)); return Ok(modified); } @@ -504,7 +504,7 @@ where let child = self.get_child_mut(cindex); match child { - PointerImpl::Link { cid, cache } => { + Pointer::Link { cid, cache } => { cache.get_or_try_init(|| { store .get_cbor(cid)? @@ -515,7 +515,7 @@ where let deleted = child_node.rm_value(hashed_key, conf, depth + 1, key, store)?; if deleted.is_some() { - *child = PointerImpl::Dirty(std::mem::take(child_node)); + *child = Pointer::Dirty(std::mem::take(child_node)); if Self::clean(child, conf, depth)? { self.rm_child(cindex, idx); } @@ -523,7 +523,7 @@ where Ok(deleted) } - PointerImpl::Dirty(node) => { + Pointer::Dirty(node) => { // Delete value and return deleted value let deleted = node.rm_value(hashed_key, conf, depth + 1, key, store)?; @@ -533,12 +533,12 @@ where Ok(deleted) } - PointerImpl::Values(vals) => { + Pointer::Values(vals) => { // Delete value for (i, p) in vals.iter().enumerate() { if key.eq(p.key().borrow()) { let old = if vals.len() == 1 { - if let PointerImpl::Values(new_v) = self.rm_child(cindex, idx) { + if let Pointer::Values(new_v) = self.rm_child(cindex, idx) { new_v.into_iter().next().unwrap() } else { unreachable!() @@ -552,13 +552,13 @@ where Ok(None) } - PointerImpl::Phantom(_) => unreachable!(), + Pointer::Phantom(_) => unreachable!(), } } pub fn flush(&mut self, store: &S) -> Result<(), Error> { for pointer in &mut self.pointers { - if let PointerImpl::Dirty(node) = pointer { + if let Pointer::Dirty(node) = pointer { // Flush cached sub node to clear it's cache node.flush(store)?; @@ -569,14 +569,14 @@ where let cache = OnceCell::from(std::mem::take(node)); // Replace cached node with Cid link - *pointer = PointerImpl::Link { cid, cache }; + *pointer = Pointer::Link { cid, cache }; } } Ok(()) } - fn rm_child(&mut self, i: usize, idx: u32) -> PointerImpl { + fn rm_child(&mut self, i: usize, idx: u32) -> Pointer { self.bitfield.clear_bit(idx); self.pointers.remove(i) } @@ -584,14 +584,13 @@ where fn insert_child(&mut self, idx: u32, key: K, value: V) { let i = self.index_for_bit_pos(idx); self.bitfield.set_bit(idx); - self.pointers - .insert(i, PointerImpl::from_key_value(key, value)) + self.pointers.insert(i, Pointer::from_key_value(key, value)) } fn insert_child_dirty(&mut self, idx: u32, node: Box>) { let i = self.index_for_bit_pos(idx); self.bitfield.set_bit(idx); - self.pointers.insert(i, PointerImpl::Dirty(node)) + self.pointers.insert(i, Pointer::Dirty(node)) } fn index_for_bit_pos(&self, bp: u32) -> usize { @@ -600,11 +599,11 @@ where mask.and(&self.bitfield).count_ones() } - fn get_child_mut(&mut self, i: usize) -> &mut PointerImpl { + fn get_child_mut(&mut self, i: usize) -> &mut Pointer { &mut self.pointers[i] } - fn get_child(&self, i: usize) -> &PointerImpl { + fn get_child(&self, i: usize) -> &Pointer { &self.pointers[i] } @@ -612,11 +611,7 @@ where /// /// Returns true if the child pointer is completely empty and can be removed, /// which can happen if we artificially inserted nodes during insertion. - fn clean( - child: &mut PointerImpl, - conf: &Config, - depth: u32, - ) -> Result { + fn clean(child: &mut Pointer, conf: &Config, depth: u32) -> Result { match child.clean(conf, depth) { Ok(()) => Ok(false), Err(Error::ZeroPointers) if depth < conf.min_data_depth => Ok(true), diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 09d7826de..e449a2ca9 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -36,7 +36,7 @@ pub mod version { /// Pointer to index values or a link to another child node. #[derive(Debug)] -pub(crate) enum PointerImpl { +pub(crate) enum Pointer { Values(Vec>), Link { cid: Cid, @@ -46,19 +46,19 @@ pub(crate) enum PointerImpl { Phantom(std::marker::PhantomData), } -impl PartialEq for PointerImpl { +impl PartialEq for Pointer { fn eq(&self, other: &Self) -> bool { match (self, other) { - (PointerImpl::Values(a), PointerImpl::Values(b)) => a == b, - (PointerImpl::Link { cid: a, .. }, PointerImpl::Link { cid: b, .. }) => a == b, - (PointerImpl::Dirty(a), PointerImpl::Dirty(b)) => a == b, + (Pointer::Values(a), Pointer::Values(b)) => a == b, + (Pointer::Link { cid: a, .. }, Pointer::Link { cid: b, .. }) => a == b, + (Pointer::Dirty(a), Pointer::Dirty(b)) => a == b, _ => false, } } } /// Serialize the Pointer like an untagged enum. -impl Serialize for PointerImpl +impl Serialize for Pointer where K: Serialize, V: Serialize, @@ -68,15 +68,15 @@ where S: Serializer, { match self { - PointerImpl::Values(vals) => vals.serialize(serializer), - PointerImpl::Link { cid, .. } => cid.serialize(serializer), - PointerImpl::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), - PointerImpl::Phantom(_) => unreachable!(), + Pointer::Values(vals) => vals.serialize(serializer), + Pointer::Link { cid, .. } => cid.serialize(serializer), + Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + Pointer::Phantom(_) => unreachable!(), } } } -impl TryFrom for PointerImpl +impl TryFrom for Pointer where K: DeserializeOwned, V: DeserializeOwned, @@ -102,7 +102,7 @@ where } /// Deserialize the Pointer like an untagged enum. -impl<'de, K, V, Ver, H> Deserialize<'de> for PointerImpl +impl<'de, K, V, Ver, H> Deserialize<'de> for Pointer where K: DeserializeOwned, V: DeserializeOwned, @@ -146,27 +146,27 @@ where } } -impl Default for PointerImpl { +impl Default for Pointer { fn default() -> Self { - PointerImpl::Values(Vec::new()) + Pointer::Values(Vec::new()) } } -impl PointerImpl +impl Pointer where K: Serialize + DeserializeOwned + Hash + PartialOrd, V: Serialize + DeserializeOwned, H: HashAlgorithm, { pub(crate) fn from_key_value(key: K, value: V) -> Self { - PointerImpl::Values(vec![KeyValuePair::new(key, value)]) + Pointer::Values(vec![KeyValuePair::new(key, value)]) } /// Internal method to cleanup children, to ensure consistent tree representation /// after deletes. pub(crate) fn clean(&mut self, conf: &Config, depth: u32) -> Result<(), Error> { match self { - PointerImpl::Dirty(n) => match n.pointers.len() { + Pointer::Dirty(n) => match n.pointers.len() { 0 => Err(Error::ZeroPointers), _ if depth < conf.min_data_depth => { // We are in the shallows where we don't want key-value pairs, just links, @@ -176,12 +176,12 @@ where } 1 => { // Node has only one pointer, swap with parent node - if let PointerImpl::Values(vals) = &mut n.pointers[0] { + if let Pointer::Values(vals) = &mut n.pointers[0] { // Take child values, to ensure canonical ordering let values = std::mem::take(vals); // move parent node up - *self = PointerImpl::Values(values) + *self = Pointer::Values(values) } Ok(()) } @@ -189,7 +189,7 @@ where // If more child values than max width, nothing to change. let mut children_len = 0; for c in n.pointers.iter() { - if let PointerImpl::Values(vals) = c { + if let Pointer::Values(vals) = c { children_len += vals.len(); } else { return Ok(()); @@ -204,7 +204,7 @@ where .pointers .iter_mut() .filter_map(|p| { - if let PointerImpl::Values(kvs) = p { + if let Pointer::Values(kvs) = p { Some(std::mem::take(kvs)) } else { None @@ -220,7 +220,7 @@ where }); // Replace link node with child values - *self = PointerImpl::Values(child_vals); + *self = Pointer::Values(child_vals); Ok(()) } _ => Ok(()), From c623b6c0bd6f1120aa684003fb05ca577572b112 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Wed, 5 Jul 2023 14:27:10 +0000 Subject: [PATCH 03/16] pub use hamtv0 --- ipld/hamt/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index adbad6aae..144f58d00 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -23,7 +23,7 @@ pub use forest_hash_utils::{BytesKey, Hash}; use serde::{Deserialize, Serialize}; pub use self::error::Error; -pub use self::hamt::Hamt; +pub use self::hamt::{Hamt, Hamtv0}; pub use self::hash::*; pub use self::hash_algorithm::*; pub use self::pointer::version; From 4af6feb99c8db171eeb2cfccedca361306317790 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Thu, 6 Jul 2023 08:21:44 +0000 Subject: [PATCH 04/16] update type alias for K --- ipld/hamt/src/hamt.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index 0ff004717..20c32e161 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -4,6 +4,7 @@ use std::borrow::Borrow; use std::marker::PhantomData; +use std::str::Bytes; use cid::Cid; use forest_hash_utils::BytesKey; @@ -36,7 +37,7 @@ use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; /// ``` pub type Hamt = HamtImpl; /// Legacy amt V0 -pub type Hamtv0 = HamtImpl; +pub type Hamtv0 = HamtImpl; #[derive(Debug)] pub struct HamtImpl { From 75da1de670fef7e85e72b5682c0b93ac42c4e2a1 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Tue, 11 Jul 2023 14:39:25 +0000 Subject: [PATCH 05/16] rm unneeded phantom and fix type alias --- ipld/hamt/src/hamt.rs | 3 +-- ipld/hamt/src/node.rs | 5 ----- ipld/hamt/src/pointer.rs | 2 -- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index 20c32e161..2ae1348a8 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -4,7 +4,6 @@ use std::borrow::Borrow; use std::marker::PhantomData; -use std::str::Bytes; use cid::Cid; use forest_hash_utils::BytesKey; @@ -37,7 +36,7 @@ use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; /// ``` pub type Hamt = HamtImpl; /// Legacy amt V0 -pub type Hamtv0 = HamtImpl; +pub type Hamtv0 = HamtImpl; #[derive(Debug)] pub struct HamtImpl { diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 4635dd03e..179a2d45b 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -172,7 +172,6 @@ where f(kv.0.borrow(), kv.1.borrow())?; } } - Pointer::Phantom(_) => unreachable!(), } } Ok(()) @@ -275,7 +274,6 @@ where } } } - Pointer::Phantom(_) => unreachable!(), } } @@ -340,7 +338,6 @@ where Pointer::Values(vals) => { return Ok(vals.iter().find(|kv| key.eq(kv.key().borrow()))); } - Pointer::Phantom(_) => unreachable!(), }; node.get_value(hashed_key, conf, key, store) @@ -409,7 +406,6 @@ where Pointer::Dirty(node) => { node.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite) } - Pointer::Phantom(_) => unreachable!(), Pointer::Values(vals) => { // Update, if the key already exists. if let Some(i) = vals.iter().position(|p| p.key() == &key) { @@ -552,7 +548,6 @@ where Ok(None) } - Pointer::Phantom(_) => unreachable!(), } } diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index e449a2ca9..ad33bffaa 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -43,7 +43,6 @@ pub(crate) enum Pointer { cache: OnceCell>>, }, Dirty(Box>), - Phantom(std::marker::PhantomData), } impl PartialEq for Pointer { @@ -71,7 +70,6 @@ where Pointer::Values(vals) => vals.serialize(serializer), Pointer::Link { cid, .. } => cid.serialize(serializer), Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), - Pointer::Phantom(_) => unreachable!(), } } } From 5ee32a72eaa2a4a299d2c31b1da8d3300121414b Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Wed, 12 Jul 2023 13:51:07 +0000 Subject: [PATCH 06/16] port serialize impl for hamtv0 pointer type --- ipld/hamt/src/node.rs | 1 + ipld/hamt/src/pointer.rs | 46 ++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 179a2d45b..4ae5fc020 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -38,6 +38,7 @@ impl Serialize for Node where K: Serialize, V: Serialize, + Ver: self::Version, { fn serialize(&self, serializer: S) -> Result where diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index ad33bffaa..db04bb6cb 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -56,25 +56,59 @@ impl PartialEq for Pointer { } } +mod pointer_v0 { + use cid::Cid; + use serde::Serialize; + + use crate::KeyValuePair; + + use super::Pointer; + + #[derive(Serialize)] + #[serde(untagged)] + pub(super) enum PointerSer<'a, K, V> { + Vals(&'a [KeyValuePair]), + Link(&'a Cid), + } + + impl<'a, K, V, Ver, H> TryFrom<&'a Pointer> for PointerSer<'a, K, V> { + type Error = &'static str; + + fn try_from(pointer: &'a Pointer) -> Result { + match pointer { + Pointer::Values(vals) => Ok(PointerSer::Vals(vals.as_ref())), + Pointer::Link { cid, .. } => Ok(PointerSer::Link(cid)), + Pointer::Dirty(_) => Err("Cannot serialize cached values"), + } + } + } +} + /// Serialize the Pointer like an untagged enum. -impl Serialize for Pointer +impl Serialize for Pointer where K: Serialize, V: Serialize, + Ver: self::version::Version, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - match self { - Pointer::Values(vals) => vals.serialize(serializer), - Pointer::Link { cid, .. } => cid.serialize(serializer), - Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + match Ver::NUMBER { + 0 => pointer_v0::PointerSer::try_from(self) + .map_err(ser::Error::custom)? + .serialize(serializer), + _ => match self { + Pointer::Values(vals) => vals.serialize(serializer), + Pointer::Link { cid, .. } => cid.serialize(serializer), + Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + }, } } } -impl TryFrom for Pointer +impl TryFrom for Pointer where K: DeserializeOwned, V: DeserializeOwned, From 7f40a6ac90c9ad71d137e178a334789fb5d0b1fc Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Thu, 13 Jul 2023 13:55:03 +0000 Subject: [PATCH 07/16] add test for hamtv0 --- ipld/hamt/src/pointer.rs | 46 ++++++++++++++++++----------------- ipld/hamt/tests/hamt_tests.rs | 16 ++++++++++-- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index db04bb6cb..b04cff5a0 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -58,7 +58,7 @@ impl PartialEq for Pointer { mod pointer_v0 { use cid::Cid; - use serde::Serialize; + use serde::{Deserialize, Serialize}; use crate::KeyValuePair; @@ -146,31 +146,33 @@ where { match Ver::NUMBER { 0 => { - let ipld = Ipld::deserialize(deserializer)?; - let (_key, value) = match ipld { - Ipld::Map(map) => map + let ipld = Ipld::deserialize(deserializer); + if let Ok(Ipld::Map(map)) = ipld { + let (_key, value) = map .into_iter() .next() - .ok_or("Expected at least one element".to_string()), - other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), - } - .map_err(de::Error::custom)?; - match value { - ipld_list @ Ipld::List(_) => { - let values: Vec> = - Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; - Ok(Self::Values(values)) + .ok_or("Expected at least one element in Ipld::Map".to_string()) + .map_err(de::Error::custom)?; + + match value { + ipld_list @ Ipld::List(_) => { + let values: Vec> = + Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; + Ok(Self::Values(values)) + } + Ipld::Link(cid) => Ok(Self::Link { + cid, + cache: Default::default(), + }), + other => Err(format!( + "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", + other + )), } - Ipld::Link(cid) => Ok(Self::Link { - cid, - cache: Default::default(), - }), - other => Err(format!( - "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", - other - )), + .map_err(de::Error::custom) + } else { + ipld.and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) } - .map_err(de::Error::custom) } _ => Ipld::deserialize(deserializer) .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index 43b84eaa6..a0904e278 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -13,7 +13,7 @@ use fvm_ipld_encoding::strict_bytes::ByteBuf; use fvm_ipld_encoding::CborStore; #[cfg(feature = "identity")] use fvm_ipld_hamt::Identity; -use fvm_ipld_hamt::{BytesKey, Config, Error, Hamt, Hash}; +use fvm_ipld_hamt::{BytesKey, Config, Error, Hamt, Hamtv0, Hash}; use multihash::Code; use quickcheck::Arbitrary; use rand::seq::SliceRandom; @@ -893,7 +893,8 @@ fn tstring(v: impl Display) -> BytesKey { } mod test_default { - use fvm_ipld_blockstore::tracking::BSStats; + use fvm_ipld_blockstore::{tracking::BSStats, MemoryBlockstore}; + use fvm_ipld_hamt::{Config, Hamtv0}; use quickcheck_macros::quickcheck; use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; @@ -1007,6 +1008,17 @@ mod test_default { super::clean_child_ordering(HamtFactory::default(), Some(stats), cids); } + #[test] + fn test_hamtv0() { + let store = MemoryBlockstore::default(); + let mut hamtv0: Hamtv0<_, _, usize> = Hamtv0::new_with_config(&store, Config::default()); + hamtv0.set(1, "world".to_string()).unwrap(); + assert_eq!(hamtv0.get(&1).unwrap(), Some(&"world".to_string())); + let c = hamtv0.flush().unwrap(); + let new_hamt = Hamtv0::load_with_config(&c, &store, Config::default()).unwrap(); + assert_eq!(hamtv0, new_hamt); + } + #[quickcheck] fn prop_cid_indep_of_insert_order(kvs: UniqueKeyValuePairs, seed: u64) -> bool { super::prop_cid_indep_of_insert_order(HamtFactory::default(), kvs, seed) From 06bebb31fbad6f52e6a9dcdba429b18b77a5f137 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Thu, 13 Jul 2023 14:53:54 +0000 Subject: [PATCH 08/16] add default bit width for hamt v0 --- ipld/hamt/src/lib.rs | 2 ++ ipld/hamt/src/pointer.rs | 2 +- ipld/hamt/tests/hamt_tests.rs | 10 +++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 144f58d00..467ef1012 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -30,6 +30,8 @@ pub use self::pointer::version; /// Default bit width for indexing a hash at each depth level const DEFAULT_BIT_WIDTH: u32 = 8; +/// Default bit width for indexing a hash at each depth level for Hamt v0 +pub const DEFAULT_BIT_WIDTH_V0: u32 = 5; /// Configuration options for a HAMT instance. #[derive(Debug, Clone)] diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index b04cff5a0..7fca858d0 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -58,7 +58,7 @@ impl PartialEq for Pointer { mod pointer_v0 { use cid::Cid; - use serde::{Deserialize, Serialize}; + use serde::Serialize; use crate::KeyValuePair; diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index a0904e278..ba8b8fa53 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -894,7 +894,7 @@ fn tstring(v: impl Display) -> BytesKey { mod test_default { use fvm_ipld_blockstore::{tracking::BSStats, MemoryBlockstore}; - use fvm_ipld_hamt::{Config, Hamtv0}; + use fvm_ipld_hamt::{Config, Hamtv0, DEFAULT_BIT_WIDTH_V0}; use quickcheck_macros::quickcheck; use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; @@ -1010,12 +1010,16 @@ mod test_default { #[test] fn test_hamtv0() { + let config = Config { + bit_width: DEFAULT_BIT_WIDTH_V0, + ..Default::default() + }; let store = MemoryBlockstore::default(); - let mut hamtv0: Hamtv0<_, _, usize> = Hamtv0::new_with_config(&store, Config::default()); + let mut hamtv0: Hamtv0<_, _, usize> = Hamtv0::new_with_config(&store, config.clone()); hamtv0.set(1, "world".to_string()).unwrap(); assert_eq!(hamtv0.get(&1).unwrap(), Some(&"world".to_string())); let c = hamtv0.flush().unwrap(); - let new_hamt = Hamtv0::load_with_config(&c, &store, Config::default()).unwrap(); + let new_hamt = Hamtv0::load_with_config(&c, &store, config).unwrap(); assert_eq!(hamtv0, new_hamt); } From bc2b6b685ca1e3fd08f3fc926e9def9c96a0c0da Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 14 Jul 2023 11:21:39 +0000 Subject: [PATCH 09/16] remove serde untagged --- ipld/hamt/src/pointer.rs | 48 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 7fca858d0..2338b7012 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -65,7 +65,6 @@ mod pointer_v0 { use super::Pointer; #[derive(Serialize)] - #[serde(untagged)] pub(super) enum PointerSer<'a, K, V> { Vals(&'a [KeyValuePair]), Link(&'a Cid), @@ -146,33 +145,34 @@ where { match Ver::NUMBER { 0 => { - let ipld = Ipld::deserialize(deserializer); - if let Ok(Ipld::Map(map)) = ipld { - let (_key, value) = map + let ipld = Ipld::deserialize(deserializer)?; + let (_key, value) = match ipld { + Ipld::Map(map) => map .into_iter() .next() - .ok_or("Expected at least one element in Ipld::Map".to_string()) - .map_err(de::Error::custom)?; - - match value { - ipld_list @ Ipld::List(_) => { - let values: Vec> = - Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; - Ok(Self::Values(values)) - } - Ipld::Link(cid) => Ok(Self::Link { - cid, - cache: Default::default(), - }), - other => Err(format!( - "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", - other - )), + .ok_or("Expected at least one element".to_string()), + other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), + } + .map_err(de::Error::custom)?; + match value { + ipld_list @ Ipld::List(_) => { + let values: Vec> = + Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; + Ok(Self::Values(values)) } - .map_err(de::Error::custom) - } else { - ipld.and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) + Ipld::Link(cid) => Ok(Self::Link { + cid, + cache: Default::default(), + }), + other => Err(format!( + "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", + other + )), } + .map_err(de::Error::custom) + // } else { + // ipld.and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) + // } } _ => Ipld::deserialize(deserializer) .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), From f8189263fff7850df9540ff332724bb92eeb26d7 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 14 Jul 2023 11:30:13 +0000 Subject: [PATCH 10/16] clippy --- ipld/hamt/tests/hamt_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index ba8b8fa53..f2c40c73e 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -13,7 +13,7 @@ use fvm_ipld_encoding::strict_bytes::ByteBuf; use fvm_ipld_encoding::CborStore; #[cfg(feature = "identity")] use fvm_ipld_hamt::Identity; -use fvm_ipld_hamt::{BytesKey, Config, Error, Hamt, Hamtv0, Hash}; +use fvm_ipld_hamt::{BytesKey, Config, Error, Hamt, Hash}; use multihash::Code; use quickcheck::Arbitrary; use rand::seq::SliceRandom; From 7aeeee9ca9ca74c9c0987c8f468b6188adf86345 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 14 Jul 2023 12:49:22 +0000 Subject: [PATCH 11/16] rm commented code --- ipld/hamt/src/pointer.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 2338b7012..626e66c97 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -170,9 +170,6 @@ where )), } .map_err(de::Error::custom) - // } else { - // ipld.and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) - // } } _ => Ipld::deserialize(deserializer) .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), From bb3f0c1c273e90503ac8986c5a21a7e0ba74d82a Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Tue, 1 Aug 2023 04:52:01 +0000 Subject: [PATCH 12/16] address code review changes --- ipld/hamt/src/hamt.rs | 7 ++-- ipld/hamt/src/lib.rs | 3 +- ipld/hamt/src/pointer.rs | 79 +++++++++++++++++++++++++++------------- 3 files changed, 58 insertions(+), 31 deletions(-) diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index 2ae1348a8..8fbc5a586 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -16,7 +16,7 @@ use serde::{Serialize, Serializer}; use crate::hash_bits::HashBits; use crate::node::Node; use crate::pointer::version::Version; -use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; +use crate::{pointer::version, Config, Error, Hash, HashAlgorithm, Sha256}; /// Implementation of the HAMT data structure for IPLD. /// @@ -34,11 +34,12 @@ use crate::{version, Config, Error, Hash, HashAlgorithm, Sha256}; /// assert_eq!(map.get::<_>(&1).unwrap(), None); /// let cid = map.flush().unwrap(); /// ``` -pub type Hamt = HamtImpl; +pub type Hamt = HamtImpl; /// Legacy amt V0 -pub type Hamtv0 = HamtImpl; +pub type Hamtv0 = HamtImpl; #[derive(Debug)] +#[doc(hidden)] pub struct HamtImpl { root: Node, store: BS, diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 467ef1012..909b2898f 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -26,7 +26,6 @@ pub use self::error::Error; pub use self::hamt::{Hamt, Hamtv0}; pub use self::hash::*; pub use self::hash_algorithm::*; -pub use self::pointer::version; /// Default bit width for indexing a hash at each depth level const DEFAULT_BIT_WIDTH: u32 = 8; @@ -76,7 +75,7 @@ impl Default for Config { type HashedKey = [u8; 32]; #[derive(Debug, Serialize, Deserialize, PartialEq)] -struct KeyValuePair(K, V); +pub struct KeyValuePair(K, V); impl KeyValuePair { pub fn key(&self) -> &K { diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 626e66c97..a4be84ab1 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -11,10 +11,13 @@ use once_cell::unsync::OnceCell; use serde::de::{self, DeserializeOwned}; use serde::{ser, Deserialize, Deserializer, Serialize, Serializer}; +use self::pointer_v0::PointerDe; + use super::node::Node; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; use crate::Config; +#[doc(hidden)] pub mod version { #[derive(PartialEq, Eq, Debug)] pub struct V0; @@ -58,7 +61,7 @@ impl PartialEq for Pointer { mod pointer_v0 { use cid::Cid; - use serde::Serialize; + use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; use crate::KeyValuePair; @@ -66,10 +69,20 @@ mod pointer_v0 { #[derive(Serialize)] pub(super) enum PointerSer<'a, K, V> { + #[serde(rename = "0")] Vals(&'a [KeyValuePair]), + #[serde(rename = "1")] Link(&'a Cid), } + #[derive(Deserialize, Serialize)] + pub enum PointerDe { + #[serde(rename = "0")] + Link(Cid), + #[serde(rename = "1")] + Vals(Vec>), + } + impl<'a, K, V, Ver, H> TryFrom<&'a Pointer> for PointerSer<'a, K, V> { type Error = &'static str; @@ -81,6 +94,18 @@ mod pointer_v0 { } } } + + impl From> for Pointer { + fn from(pointer: PointerDe) -> Self { + match pointer { + PointerDe::Link(cid) => Pointer::Link { + cid, + cache: Default::default(), + }, + PointerDe::Vals(vals) => Pointer::Values(vals), + } + } + } } /// Serialize the Pointer like an untagged enum. @@ -145,31 +170,33 @@ where { match Ver::NUMBER { 0 => { - let ipld = Ipld::deserialize(deserializer)?; - let (_key, value) = match ipld { - Ipld::Map(map) => map - .into_iter() - .next() - .ok_or("Expected at least one element".to_string()), - other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), - } - .map_err(de::Error::custom)?; - match value { - ipld_list @ Ipld::List(_) => { - let values: Vec> = - Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; - Ok(Self::Values(values)) - } - Ipld::Link(cid) => Ok(Self::Link { - cid, - cache: Default::default(), - }), - other => Err(format!( - "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", - other - )), - } - .map_err(de::Error::custom) + let pointer_de: PointerDe = Deserialize::deserialize(deserializer)?; + Ok(Pointer::from(pointer_de)) + // let ipld = Ipld::deserialize(deserializer)?; + // let (_key, value) = match ipld { + // Ipld::Map(map) => map + // .into_iter() + // .next() + // .ok_or("Expected at least one element".to_string()), + // other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), + // } + // .map_err(de::Error::custom)?; + // match value { + // ipld_list @ Ipld::List(_) => { + // let values: Vec> = + // Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; + // Ok(Self::Values(values)) + // } + // Ipld::Link(cid) => Ok(Self::Link { + // cid, + // cache: Default::default(), + // }), + // other => Err(format!( + // "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", + // other + // )), + // } + // .map_err(de::Error::custom) } _ => Ipld::deserialize(deserializer) .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), From b831c43b2b2895c27938704190d8ef3dbb222d7c Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Tue, 1 Aug 2023 05:14:27 +0000 Subject: [PATCH 13/16] lint --- ipld/hamt/src/pointer.rs | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index a4be84ab1..6c61b367f 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -61,7 +61,7 @@ impl PartialEq for Pointer { mod pointer_v0 { use cid::Cid; - use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; + use serde::{Deserialize, Serialize}; use crate::KeyValuePair; @@ -172,31 +172,6 @@ where 0 => { let pointer_de: PointerDe = Deserialize::deserialize(deserializer)?; Ok(Pointer::from(pointer_de)) - // let ipld = Ipld::deserialize(deserializer)?; - // let (_key, value) = match ipld { - // Ipld::Map(map) => map - // .into_iter() - // .next() - // .ok_or("Expected at least one element".to_string()), - // other => Err(format!("Expected `Ipld::Map`, got {:#?}", other)), - // } - // .map_err(de::Error::custom)?; - // match value { - // ipld_list @ Ipld::List(_) => { - // let values: Vec> = - // Deserialize::deserialize(ipld_list).map_err(de::Error::custom)?; - // Ok(Self::Values(values)) - // } - // Ipld::Link(cid) => Ok(Self::Link { - // cid, - // cache: Default::default(), - // }), - // other => Err(format!( - // "Expected `Ipld::List` or `Ipld::Link`, got {:#?}", - // other - // )), - // } - // .map_err(de::Error::custom) } _ => Ipld::deserialize(deserializer) .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), From 713099afd845e1fb366ec672af4811fa6cb95b31 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 2 Aug 2023 16:02:21 -0700 Subject: [PATCH 14/16] fix pointer-v0 decoding Well, that was an sneaky bug. --- ipld/hamt/src/pointer.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 6c61b367f..64f1bb126 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -11,8 +11,6 @@ use once_cell::unsync::OnceCell; use serde::de::{self, DeserializeOwned}; use serde::{ser, Deserialize, Deserializer, Serialize, Serializer}; -use self::pointer_v0::PointerDe; - use super::node::Node; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; use crate::Config; @@ -70,9 +68,9 @@ mod pointer_v0 { #[derive(Serialize)] pub(super) enum PointerSer<'a, K, V> { #[serde(rename = "0")] - Vals(&'a [KeyValuePair]), - #[serde(rename = "1")] Link(&'a Cid), + #[serde(rename = "1")] + Vals(&'a [KeyValuePair]), } #[derive(Deserialize, Serialize)] @@ -170,7 +168,8 @@ where { match Ver::NUMBER { 0 => { - let pointer_de: PointerDe = Deserialize::deserialize(deserializer)?; + let pointer_de: pointer_v0::PointerDe = + Deserialize::deserialize(deserializer)?; Ok(Pointer::from(pointer_de)) } _ => Ipld::deserialize(deserializer) From 782567477bfdafe1c2f0a2491d6f502197220f2b Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 2 Aug 2023 16:20:45 -0700 Subject: [PATCH 15/16] nit: remove DEFAULT_BIT_WIDTH_V0 The user needs to pass the bit width based on the context. This constant isn't actually used anywhere except tests anyways (which, IMO, is confusing as I'd expect it to be used by-default for v0 hamts if specified like that). --- ipld/hamt/src/lib.rs | 2 -- ipld/hamt/tests/hamt_tests.rs | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 909b2898f..471e71a80 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -29,8 +29,6 @@ pub use self::hash_algorithm::*; /// Default bit width for indexing a hash at each depth level const DEFAULT_BIT_WIDTH: u32 = 8; -/// Default bit width for indexing a hash at each depth level for Hamt v0 -pub const DEFAULT_BIT_WIDTH_V0: u32 = 5; /// Configuration options for a HAMT instance. #[derive(Debug, Clone)] diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index f2c40c73e..6bb40dcfc 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -894,7 +894,7 @@ fn tstring(v: impl Display) -> BytesKey { mod test_default { use fvm_ipld_blockstore::{tracking::BSStats, MemoryBlockstore}; - use fvm_ipld_hamt::{Config, Hamtv0, DEFAULT_BIT_WIDTH_V0}; + use fvm_ipld_hamt::{Config, Hamtv0}; use quickcheck_macros::quickcheck; use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; @@ -1011,7 +1011,7 @@ mod test_default { #[test] fn test_hamtv0() { let config = Config { - bit_width: DEFAULT_BIT_WIDTH_V0, + bit_width: 5, ..Default::default() }; let store = MemoryBlockstore::default(); From a34fcf6b2b1626797698f96cf95fcde1f9de7a61 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 2 Aug 2023 16:24:21 -0700 Subject: [PATCH 16/16] nit: make keyvaluepair private --- ipld/hamt/src/lib.rs | 2 +- ipld/hamt/src/pointer.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 471e71a80..2bb6567c5 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -73,7 +73,7 @@ impl Default for Config { type HashedKey = [u8; 32]; #[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct KeyValuePair(K, V); +struct KeyValuePair(K, V); impl KeyValuePair { pub fn key(&self) -> &K { diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 64f1bb126..bce841c5a 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -74,7 +74,7 @@ mod pointer_v0 { } #[derive(Deserialize, Serialize)] - pub enum PointerDe { + pub(super) enum PointerDe { #[serde(rename = "0")] Link(Cid), #[serde(rename = "1")]