Suggestions for storage value decoding (#1457)

* Storage decode tweaks * doc tweak * more precise error when leftover or not enough bytes
paritytech · Feb 28, 2024 · bd01aab · bd01aab
1 parent 4bfaa4c
commit bd01aab
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 147 deletions.
diff --git a/codegen/src/api/storage.rs b/codegen/src/api/storage.rs
@@ -201,7 +201,7 @@ fn generate_storage_entry_fns(
             0 => (quote!(()), quote!(())),
             1 => {
                 let key = &keys_slice[0];
-                if key.hasher.hash_contains_unhashed_key() {
+                if key.hasher.ends_with_key() {
                     let arg = &key.arg_name;
                     let keys = quote!(#static_storage_key::new(#arg.borrow()));
                     let path = &key.alias_type_path;
@@ -216,7 +216,7 @@ fn generate_storage_entry_fns(
                     |MapEntryKey {
                          arg_name, hasher, ..
                      }| {
-                        if hasher.hash_contains_unhashed_key() {
+                        if hasher.ends_with_key() {
                             quote!( #static_storage_key::new(#arg_name.borrow()) )
                         } else {
                             quote!(())
@@ -230,7 +230,7 @@ fn generate_storage_entry_fns(
                          hasher,
                          ..
                      }| {
-                        if hasher.hash_contains_unhashed_key() {
+                        if hasher.ends_with_key() {
                             quote!( #static_storage_key<#alias_type_path> )
                         } else {
                             quote!(())

diff --git a/metadata/src/lib.rs b/metadata/src/lib.rs
@@ -476,14 +476,14 @@ pub enum StorageHasher {
 }
 
 impl StorageHasher {
-    /// The hash produced by a [`StorageHasher`] has 1 or 2 of the following components:
-    /// 1. A fixed size hash. (not present for [`StorageHasher::Identity`])
-    /// 2. The key that was used as an input to the hasher. (only present for
+    /// The hash produced by a [`StorageHasher`] can have these two components, in order:
+    ///
+    /// 1. A fixed size hash. (not present for [`StorageHasher::Identity`]).
+    /// 2. The SCALE encoded key that was used as an input to the hasher (only present for
     /// [`StorageHasher::Twox64Concat`], [`StorageHasher::Blake2_128Concat`] or [`StorageHasher::Identity`]).
     ///
-    /// This function returns the number of hash bytes that must be skipped to get to the 2. component, the unhashed key.
-    /// To check whether or not an unhashed key is contained, see [`StorageHasher::hash_bytes_before_unhashed_key`].
-    pub fn hash_bytes_before_unhashed_key(&self) -> usize {
+    /// This function returns the number of bytes used to represent the first of these.
+    pub fn len_excluding_key(&self) -> usize {
         match self {
             StorageHasher::Blake2_128Concat => 16,
             StorageHasher::Twox64Concat => 8,
@@ -495,10 +495,8 @@ impl StorageHasher {
         }
     }
 
-    /// Returns if the key a hash was produces for, is contained in the hash itself.
-    /// If this is `true`, [`StorageHasher::hash_bytes_before_unhashed_key`] can be used,
-    /// to find the offset of the start byte of the key from the start of the hash bytes.
-    pub fn hash_contains_unhashed_key(&self) -> bool {
+    /// Returns true if the key used to produce the hash is appended to the hash itself.
+    pub fn ends_with_key(&self) -> bool {
         matches!(
             self,
             StorageHasher::Blake2_128Concat | StorageHasher::Twox64Concat | StorageHasher::Identity

diff --git a/subxt/src/error/mod.rs b/subxt/src/error/mod.rs
@@ -207,16 +207,20 @@ pub enum StorageAddressError {
         /// The number of fields in the metadata for this storage entry.
         fields: usize,
     },
+    /// We weren't given enough bytes to decode the storage address/key.
+    #[error("Not enough remaining bytes to decode the storage address/key")]
+    NotEnoughBytes,
+    /// We have leftover bytes after decoding the storage address.
+    #[error("We have leftover bytes after decoding the storage address")]
+    TooManyBytes,
     /// The bytes of a storage address are not the expected address for decoding the storage keys of the address.
-    #[error("Storage address bytes are not the expected format. Addresses need to be at least 16 bytes (pallet ++ entry) and follow a structure given by the hashers defined in the metadata.")]
+    #[error("Storage address bytes are not the expected format. Addresses need to be at least 16 bytes (pallet ++ entry) and follow a structure given by the hashers defined in the metadata")]
     UnexpectedAddressBytes,
     /// An invalid hasher was used to reconstruct a value from a chunk of bytes that is part of a storage address. Hashers where the hash does not contain the original value are invalid for this purpose.
-    #[error("An invalid hasher was used to reconstruct a value of type {ty_name} (id={ty_id}) from a hash formed by a {hasher:?} hasher. This is only possible for concat-style hashers or the identity hasher")]
+    #[error("An invalid hasher was used to reconstruct a value with type ID {ty_id} from a hash formed by a {hasher:?} hasher. This is only possible for concat-style hashers or the identity hasher")]
     HasherCannotReconstructKey {
         /// Type id of the key's type.
         ty_id: u32,
-        /// Type name of the key's type.
-        ty_name: String,
         /// The invalid hasher that caused this error.
         hasher: StorageHasher,
     },

diff --git a/subxt/src/storage/mod.rs b/subxt/src/storage/mod.rs
@@ -8,8 +8,7 @@ mod storage_address;
 mod storage_client;
 mod storage_key;
 mod storage_type;
-
-pub mod utils;
+mod utils;
 
 pub use storage_client::StorageClient;
 

diff --git a/subxt/src/storage/storage_key.rs b/subxt/src/storage/storage_key.rs
@@ -1,11 +1,12 @@
-use super::utils::strip_storage_hash_bytes;
 use crate::{
-    dynamic::DecodedValueThunk,
     error::{Error, StorageAddressError},
-    metadata::{DecodeWithMetadata, Metadata},
+    metadata::Metadata,
     utils::{Encoded, Static},
 };
+use scale_decode::{visitor::IgnoreVisitor, DecodeAsType};
 use scale_encode::EncodeAsType;
+use scale_info::PortableRegistry;
+use scale_value::Value;
 use subxt_metadata::StorageHasher;
 
 use derivative::Derivative;
@@ -21,25 +22,29 @@ pub trait StorageKey {
     // But that plays poorly with the `Flatten` and `Chain` structs.
     fn keys_len(&self) -> usize;
 
-    /// Attempts to decode the StorageKey from a storage address that has been stripped of its root bytes.
+    /// Attempts to decode the StorageKey given some bytes and a set of hashers and type IDs that they are meant to represent.
     ///
-    /// Example: Imagine The `StorageKey` is a tuple (A,B) and the hashers are: [Blake2_128Concat, Twox64Concat].
+    /// Example: Imagine The `StorageKey` is a tuple `(A,B)` and the hashers are `[Blake2_128Concat, Twox64Concat]`.
     /// Then the memory layout of the storage address is:
+    ///
     /// ```txt
-    /// | 8 bytes pallet hash | 8 bytes entry hash | 16 bytes hash of A | ... bytes of A | 8 bytes hash of B | ... bytes of B |
+    /// | 16 byte hash of A | n bytes for SCALE encoded A | 8 byte hash of B | n bytes for SCALE encoded B |
     /// ```
-    /// `cursor` should point into a region after those first 16 bytes, at the start of a new hash.
-    /// `hashers_and_ty_ids` should consume all the hashers that have been used for decoding, such that there are less hashers coming to the next key.
+    ///
+    /// Implementations of this must advance the `bytes` and `hashers_and_ty_ids` cursors to consume any that they are using, or
+    /// return an error if they cannot appropriately do so. When a tuple of such implementations is given, each implementation
+    /// in the tuple receives the remaining un-consumed bytes and hashers from the previous ones.
     fn decode_from_bytes(
-        cursor: &mut &[u8],
+        bytes: &mut &[u8],
         hashers_and_ty_ids: &mut &[(StorageHasher, u32)],
         metadata: &Metadata,
     ) -> Result<Self, Error>
     where
         Self: Sized + 'static;
 }
 
-/// Implement `StorageKey` for `()` which can be used for keyless storage entries.
+/// Implement `StorageKey` for `()` which can be used for keyless storage entries,
+/// or to otherwise just ignore some entry.
 impl StorageKey for () {
     fn keys_iter(&self) -> impl Iterator<Item = &dyn EncodeAsType> {
         std::iter::empty()
@@ -50,18 +55,24 @@ impl StorageKey for () {
     }
 
     fn decode_from_bytes(
-        _cursor: &mut &[u8],
+        bytes: &mut &[u8],
         hashers_and_ty_ids: &mut &[(StorageHasher, u32)],
-        _metadata: &Metadata,
+        metadata: &Metadata,
     ) -> Result<Self, Error> {
-        if hashers_and_ty_ids.is_empty() {
-            return Err(StorageAddressError::WrongNumberOfHashers {
-                hashers: 0,
-                fields: 1,
+        // If no hashers, we just do nothing (erroring if ).
+        let Some((hasher, ty_id)) = hashers_and_ty_ids.first() else {
+            if bytes.is_empty() {
+                return Ok(());
+            } else {
+                return Err(StorageAddressError::TooManyBytes.into());
             }
-            .into());
-        }
-        *hashers_and_ty_ids = &hashers_and_ty_ids[1..]; // Advance cursor by 1
+        };
+
+        // Consume the hash bytes (we don't care about the key output here).
+        consume_hash_returning_key_bytes(bytes, *hasher, *ty_id, metadata.types())?;
+        // Advance our hasher cursor as well now that we've used it.
+        *hashers_and_ty_ids = &hashers_and_ty_ids[1..];
+
         Ok(())
     }
 }
@@ -71,8 +82,8 @@ impl StorageKey for () {
 #[derive(Derivative)]
 #[derivative(Clone(bound = ""), Debug(bound = ""))]
 pub struct StaticStorageKey<K: ?Sized> {
-    pub(super) bytes: Static<Encoded>,
-    pub(super) _marker: std::marker::PhantomData<K>,
+    bytes: Static<Encoded>,
+    _marker: std::marker::PhantomData<K>,
 }
 
 impl<K: codec::Encode + ?Sized> StaticStorageKey<K> {
@@ -111,7 +122,7 @@ impl<K: ?Sized> StorageKey for StaticStorageKey<K> {
     }
 
     fn decode_from_bytes(
-        cursor: &mut &[u8],
+        bytes: &mut &[u8],
         hashers_and_ty_ids: &mut &[(StorageHasher, u32)],
         metadata: &Metadata,
     ) -> Result<Self, Error>
@@ -125,53 +136,28 @@ impl<K: ?Sized> StorageKey for StaticStorageKey<K> {
             }
             .into());
         };
-        *hashers_and_ty_ids = &hashers_and_ty_ids[1..]; // Advance cursor by 1
-        decode_storage_key_from_hash(cursor, hasher, *ty_id, metadata)
-    }
-}
 
-pub fn decode_storage_key_from_hash<K: ?Sized>(
-    cursor: &mut &[u8],
-    hasher: &StorageHasher,
-    ty_id: u32,
-    metadata: &Metadata,
-) -> Result<StaticStorageKey<K>, Error> {
-    strip_storage_hash_bytes(cursor, hasher)?;
-
-    let bytes = *cursor;
-    if let Err(err) = scale_decode::visitor::decode_with_visitor(
-        cursor,
-        ty_id,
-        metadata.types(),
-        scale_decode::visitor::IgnoreVisitor,
-    ) {
-        return Err(scale_decode::Error::from(err).into());
+        // Advance the bytes cursor, returning any key bytes.
+        let key_bytes = consume_hash_returning_key_bytes(bytes, *hasher, *ty_id, metadata.types())?;
+        // Advance the hasher cursor now we've used it.
+        *hashers_and_ty_ids = &hashers_and_ty_ids[1..];
+
+        // if the hasher had no key appended, we can't decode it into a `StaticStorageKey`.
+        let Some(key_bytes) = key_bytes else {
+            return Err(StorageAddressError::HasherCannotReconstructKey {
+                ty_id: *ty_id,
+                hasher: *hasher,
+            }
+            .into());
+        };
+
+        // Return the key bytes.
+        let key = StaticStorageKey {
+            bytes: Static(Encoded(key_bytes.to_vec())),
+            _marker: std::marker::PhantomData::<K>,
+        };
+        Ok(key)
     }
-    let bytes_decoded = bytes.len() - cursor.len();
-
-    // Note: This validation check makes sure, only zero-sized types can be decoded from
-    // hashers that do not support reconstruction of a value
-    if !hasher.hash_contains_unhashed_key() && bytes_decoded > 0 {
-        let ty_name = metadata
-            .types()
-            .resolve(ty_id)
-            .expect("ty_id is in metadata, because decode_with_visitor did not fail above; qed")
-            .path
-            .to_string();
-        return Err(StorageAddressError::HasherCannotReconstructKey {
-            ty_id,
-            ty_name,
-            hasher: *hasher,
-        }
-        .into());
-    };
-
-    let key_bytes = bytes[..bytes_decoded].to_vec();
-    let key = StaticStorageKey {
-        bytes: Static(Encoded(key_bytes)),
-        _marker: std::marker::PhantomData::<K>,
-    };
-    Ok(key)
 }
 
 impl StorageKey for Vec<scale_value::Value> {
@@ -186,32 +172,67 @@ impl StorageKey for Vec<scale_value::Value> {
     }
 
     fn decode_from_bytes(
-        cursor: &mut &[u8],
+        bytes: &mut &[u8],
         hashers_and_ty_ids: &mut &[(StorageHasher, u32)],
         metadata: &Metadata,
     ) -> Result<Self, Error>
     where
         Self: Sized + 'static,
     {
-        let mut hashers_and_ty_ids_iter = hashers_and_ty_ids.iter();
         let mut result: Vec<scale_value::Value> = vec![];
-        let mut n = 0;
-        while !cursor.is_empty() {
-            let Some((hasher, ty_id)) = hashers_and_ty_ids_iter.next() else {
-                // Still bytes left, but no hashers and type ids anymore to pull from: this is an unexpected error.
-                return Err(StorageAddressError::UnexpectedAddressBytes.into());
-            };
-            strip_storage_hash_bytes(cursor, hasher)?;
-            let decoded = DecodedValueThunk::decode_with_metadata(cursor, *ty_id, metadata)?;
-            let value = decoded.to_value()?;
-            result.push(value.remove_context());
-            n += 1;
+        for (hasher, ty_id) in hashers_and_ty_ids.iter() {
+            match consume_hash_returning_key_bytes(bytes, *hasher, *ty_id, metadata.types())? {
+                Some(value_bytes) => {
+                    let value =
+                        Value::decode_as_type(&mut &*value_bytes, *ty_id, metadata.types())?;
+                    result.push(value.remove_context());
+                }
+                None => {
+                    result.push(Value::unnamed_composite([]));
+                }
+            }
+            *hashers_and_ty_ids = &hashers_and_ty_ids[1..]; // Advance by 1 each time.
+        }
+
+        // We've consumed all of the hashers, so we expect to also consume all of the bytes:
+        if !bytes.is_empty() {
+            return Err(StorageAddressError::TooManyBytes.into());
         }
-        *hashers_and_ty_ids = &hashers_and_ty_ids[n..]; // Advance cursor by n
+
         Ok(result)
     }
 }
 
+// Skip over the hash bytes (including any key at the end), returning bytes
+// representing the key if one exists, or None if the hasher has no key appended.
+fn consume_hash_returning_key_bytes<'a>(
+    bytes: &mut &'a [u8],
+    hasher: StorageHasher,
+    ty_id: u32,
+    types: &PortableRegistry,
+) -> Result<Option<&'a [u8]>, Error> {
+    // Strip the bytes off for the actual hash, consuming them.
+    let bytes_to_strip = hasher.len_excluding_key();
+    if bytes.len() < bytes_to_strip {
+        return Err(StorageAddressError::NotEnoughBytes.into());
+    }
+    *bytes = &bytes[bytes_to_strip..];
+
+    // Now, find the bytes representing the key, consuming them.
+    let before_key = *bytes;
+    if hasher.ends_with_key() {
+        scale_decode::visitor::decode_with_visitor(bytes, ty_id, types, IgnoreVisitor)
+            .map_err(|err| Error::Decode(err.into()))?;
+
+        // Return the key bytes, having advanced the input cursor past them.
+        let key_bytes = &before_key[before_key.len() - bytes.len()..];
+        Ok(Some(key_bytes))
+    } else {
+        // There are no key bytes, so return None.
+        Ok(None)
+    }
+}
+
 /// Generates StorageKey implementations for tuples, e.g.
 /// ```rs,norun
 /// impl<A: EncodeAsType, B: EncodeAsType> StorageKey for (StorageKey<A>, StorageKey<B>) {
@@ -278,11 +299,11 @@ macro_rules! impl_tuples {
 
 #[rustfmt::skip]
 const _: () = {
-    impl_tuples!(A iter_a 0, B iter_ab 1);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2, D iter_d 3);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2, D iter_d 3, E iter_e 4);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5, G iter_g 6);
-    impl_tuples!(A iter_a 0, B iter_ab 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5, G iter_g 6, H iter_h 7);
+    impl_tuples!(A iter_a 0, B iter_b 1);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2, D iter_d 3);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2, D iter_d 3, E iter_e 4);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5, G iter_g 6);
+    impl_tuples!(A iter_a 0, B iter_b 1, C iter_c 2, D iter_d 3, E iter_e 4, F iter_f 5, G iter_g 6, H iter_h 7);
 };