From c9a3b99f6cf9cddcef9189934984be10be493acb Mon Sep 17 00:00:00 2001 From: Wondertan Date: Tue, 4 Oct 2022 15:55:59 +0200 Subject: [PATCH] fix(ipld/plugin): don't truncate a type byte when it's not in the data The code has a special case for the share data with type byte and without. However, it truncates the type byte like it is always there. The #1139 is the first user of share data without a type byte, which uncovered the bug. The type byte causes issues for us again. refactor(ipld/plugin): extract namespaceHasher into a separate file with some additional cleanups In preparation for the upcoming test for the namespaceHasher, we extract it into a separate file. Also, some additional cosmetics were done reducing the API surfuce of the pkg, hiding things that are not supposed to be used outside. test(ipld/plugin): write dummy unit test for the namespaceHasher docs(ipld/plugin): godocs for the namespaceHasher perf(ipld/plugin): use sha256-simd to gain 40% speedup for hash computations --- go.mod | 2 +- ipld/plugin/namespace_hasher.go | 76 ++++++++++++++++++++++++++++ ipld/plugin/namespace_hasher_test.go | 67 ++++++++++++++++++++++++ ipld/plugin/nmt.go | 73 +++++--------------------- 4 files changed, 157 insertions(+), 61 deletions(-) create mode 100644 ipld/plugin/namespace_hasher.go create mode 100644 ipld/plugin/namespace_hasher_test.go diff --git a/go.mod b/go.mod index 9480df8a50..27f736b41c 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/libp2p/go-libp2p-pubsub v0.7.0 github.com/libp2p/go-libp2p-record v0.1.3 github.com/libp2p/go-libp2p-routing-helpers v0.2.3 + github.com/minio/sha256-simd v1.0.0 github.com/mitchellh/go-homedir v1.1.0 github.com/multiformats/go-base32 v0.1.0 github.com/multiformats/go-multiaddr v0.7.0 @@ -202,7 +203,6 @@ require ( github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/mimoo/StrobeGo v0.0.0-20181016162300-f8f6d4d2b643 // indirect github.com/minio/highwayhash v1.0.2 // indirect - github.com/minio/sha256-simd v1.0.0 // indirect github.com/mitchellh/go-testing-interface v1.0.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mr-tron/base58 v1.2.0 // indirect diff --git a/ipld/plugin/namespace_hasher.go b/ipld/plugin/namespace_hasher.go new file mode 100644 index 0000000000..3a307e1bfe --- /dev/null +++ b/ipld/plugin/namespace_hasher.go @@ -0,0 +1,76 @@ +package plugin + +import ( + "fmt" + "hash" + + "github.com/minio/sha256-simd" + mhcore "github.com/multiformats/go-multihash/core" + "github.com/tendermint/tendermint/pkg/consts" + + "github.com/celestiaorg/nmt" +) + +func init() { + // Register custom hasher in the multihash register. + // Required for the Bitswap to hash and verify inbound data correctly + mhcore.Register(sha256Namespace8Flagged, func() hash.Hash { + return defaultHasher() + }) +} + +// namespaceHasher implements hash.Hash over NMT Hasher. +// TODO: Move to NMT repo? +type namespaceHasher struct { + *nmt.Hasher + tp byte + data []byte +} + +// defaultHasher constructs the namespaceHasher with default configuration +func defaultHasher() *namespaceHasher { + return &namespaceHasher{Hasher: nmt.NewNmtHasher(sha256.New(), nmt.DefaultNamespaceIDLen, true)} +} + +// Write writes the namespaced data to be hashed. +// +// Requires data of fixed size to match leaf or inner NMT nodes. +// Only one write is allowed. +func (n *namespaceHasher) Write(data []byte) (int, error) { + if n.data != nil { + return 0, fmt.Errorf("ipld: only one write to hasher is allowed") + } + + ln, nln, hln := len(data), int(n.NamespaceLen), n.Hash.Size() + innerNodeSize, leafNodeSize := (nln*2+hln)*2, nln+consts.ShareSize + switch ln { + default: + return 0, fmt.Errorf("ipld: wrong sized data written to the hasher") + case innerNodeSize: + n.tp = nmt.NodePrefix + case leafNodeSize: + n.tp = nmt.LeafPrefix + case innerNodeSize + typeSize: // w/ additional type byte + n.tp = nmt.NodePrefix + data = data[typeSize:] + case leafNodeSize + typeSize: // w/ additional type byte + n.tp = nmt.LeafPrefix + data = data[typeSize:] + } + + n.data = data + return len(n.data), nil +} + +// Sum computes the hash. +// Does not append the given suffix and violating the interface. +func (n *namespaceHasher) Sum([]byte) []byte { + isLeafData := n.tp == nmt.LeafPrefix + if isLeafData { + return n.Hasher.HashLeaf(n.data) + } + + flagLen := int(n.NamespaceLen * 2) + sha256Len := n.Hasher.Size() + return n.Hasher.HashNode(n.data[:flagLen+sha256Len], n.data[flagLen+sha256Len:]) +} diff --git a/ipld/plugin/namespace_hasher_test.go b/ipld/plugin/namespace_hasher_test.go new file mode 100644 index 0000000000..d23fd2fb6e --- /dev/null +++ b/ipld/plugin/namespace_hasher_test.go @@ -0,0 +1,67 @@ +package plugin + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tendermint/tendermint/pkg/consts" +) + +func TestNamespaceHasherWrite(t *testing.T) { + leafSize := consts.ShareSize + consts.NamespaceSize + innerSize := nmtHashSize * 2 + tt := []struct { + name string + expectedSize int + writtenSize int + }{ + { + "Leaf", + leafSize, + leafSize, + }, + { + "Inner", + innerSize, + innerSize, + }, + { + "LeafAndType", + leafSize, + leafSize + typeSize, + }, + { + "InnerAndType", + innerSize, + innerSize + typeSize, + }, + } + + for _, ts := range tt { + t.Run("Success"+ts.name, func(t *testing.T) { + h := defaultHasher() + n, err := h.Write(make([]byte, ts.writtenSize)) + assert.NoError(t, err) + assert.Equal(t, ts.expectedSize, n) + assert.Equal(t, ts.expectedSize, len(h.data)) + }) + } + + t.Run("ErrorSecondWrite", func(t *testing.T) { + h := defaultHasher() + n, err := h.Write(make([]byte, leafSize)) + assert.NoError(t, err) + assert.Equal(t, leafSize, n) + + n, err = h.Write(make([]byte, leafSize)) + assert.Error(t, err) + assert.Equal(t, 0, n) + }) + + t.Run("ErrorIncorrectSize", func(t *testing.T) { + h := defaultHasher() + n, err := h.Write(make([]byte, 13)) + assert.Error(t, err) + assert.Equal(t, 0, n) + }) +} diff --git a/ipld/plugin/nmt.go b/ipld/plugin/nmt.go index c0700390a8..6b03096ae2 100644 --- a/ipld/plugin/nmt.go +++ b/ipld/plugin/nmt.go @@ -6,14 +6,12 @@ import ( "crypto/sha256" "errors" "fmt" - "hash" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-blockservice" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" mh "github.com/multiformats/go-multihash" - mhcore "github.com/multiformats/go-multihash/core" "github.com/tendermint/tendermint/pkg/consts" "github.com/celestiaorg/nmt" @@ -23,61 +21,19 @@ const ( // Below used multiformats (one codec, one multihash) seem free: // https://github.com/multiformats/multicodec/blob/master/table.csv - // NmtCodec is the codec used for leaf and inner nodes of a Namespaced Merkle Tree. - NmtCodec = 0x7700 + // nmtCodec is the codec used for leaf and inner nodes of a Namespaced Merkle Tree. + nmtCodec = 0x7700 // Sha256Namespace8Flagged is the multihash code used to hash blocks // that contain an NMT node (inner and leaf nodes). - Sha256Namespace8Flagged = 0x7701 + sha256Namespace8Flagged = 0x7701 // nmtHashSize is the size of a digest created by an NMT in bytes. nmtHashSize = 2*consts.NamespaceSize + sha256.Size -) - -func init() { - mhcore.Register(Sha256Namespace8Flagged, func() hash.Hash { - return NewNamespaceHasher(nmt.NewNmtHasher(sha256.New(), nmt.DefaultNamespaceIDLen, true)) - }) -} - -type namespaceHasher struct { - *nmt.Hasher - tp byte - data []byte -} -func NewNamespaceHasher(hasher *nmt.Hasher) hash.Hash { - return &namespaceHasher{ - Hasher: hasher, - } -} - -func (n *namespaceHasher) Write(data []byte) (int, error) { - ln, nln, hln := len(data), int(n.NamespaceLen), n.Hash.Size() - innerNodeSize, leafNodeSize := (nln*2+hln)*2, nln+consts.ShareSize - switch ln { - default: - return 0, fmt.Errorf("wrong data size") - case innerNodeSize, innerNodeSize + 1: // w/ and w/o additional type byte - n.tp = nmt.NodePrefix - case leafNodeSize, leafNodeSize + 1: // w/ and w/o additional type byte - n.tp = nmt.LeafPrefix - } - - n.data = data[1:] - return ln, nil -} - -func (n *namespaceHasher) Sum([]byte) []byte { - isLeafData := n.tp == nmt.LeafPrefix - if isLeafData { - return n.Hasher.HashLeaf(n.data) - } - - flagLen := int(n.NamespaceLen * 2) - sha256Len := n.Hasher.Size() - return n.Hasher.HashNode(n.data[:flagLen+sha256Len], n.data[flagLen+sha256Len:]) -} + // typeSize defines the size of the serialized NMT Node type + typeSize = 1 +) func GetNode(ctx context.Context, bGetter blockservice.BlockGetter, root cid.Cid) (ipld.Node, error) { block, err := bGetter.GetBlock(ctx, root) @@ -93,8 +49,6 @@ func GetNode(ctx context.Context, bGetter blockservice.BlockGetter, root cid.Cid } func decodeBlock(block blocks.Block) (ipld.Node, error) { - // length of the domain separator for leaf and inner nodes: - const prefixOffset = 1 var ( leafPrefix = []byte{nmt.LeafPrefix} innerPrefix = []byte{nmt.NodePrefix} @@ -106,18 +60,18 @@ func decodeBlock(block blocks.Block) (ipld.Node, error) { Data: nil, }, nil } - domainSeparator := data[:prefixOffset] + domainSeparator := data[:typeSize] if bytes.Equal(domainSeparator, leafPrefix) { return &nmtLeafNode{ cid: block.Cid(), - Data: data[prefixOffset:], + Data: data[typeSize:], }, nil } if bytes.Equal(domainSeparator, innerPrefix) { return &nmtNode{ cid: block.Cid(), - l: data[prefixOffset : prefixOffset+nmtHashSize], - r: data[prefixOffset+nmtHashSize:], + l: data[typeSize : typeSize+nmtHashSize], + r: data[typeSize+nmtHashSize:], }, nil } return nil, fmt.Errorf( @@ -132,7 +86,6 @@ var _ ipld.Node = (*nmtNode)(nil) var _ ipld.Node = (*nmtLeafNode)(nil) type nmtNode struct { - // TODO(ismail): we might want to export these later cid cid.Cid l, r []byte } @@ -305,11 +258,11 @@ func CidFromNamespacedSha256(namespacedHash []byte) (cid.Cid, error) { if got, want := len(namespacedHash), nmtHashSize; got != want { return cid.Cid{}, fmt.Errorf("invalid namespaced hash length, got: %v, want: %v", got, want) } - buf, err := mh.Encode(namespacedHash, Sha256Namespace8Flagged) + buf, err := mh.Encode(namespacedHash, sha256Namespace8Flagged) if err != nil { return cid.Undef, err } - return cid.NewCidV1(NmtCodec, buf), nil + return cid.NewCidV1(nmtCodec, buf), nil } // MustCidFromNamespacedSha256 is a wrapper around cidFromNamespacedSha256 that panics @@ -320,7 +273,7 @@ func MustCidFromNamespacedSha256(hash []byte) cid.Cid { panic( fmt.Sprintf("malformed hash: %s, codec: %v", err, - mh.Codes[Sha256Namespace8Flagged]), + mh.Codes[sha256Namespace8Flagged]), ) } return cidFromHash