Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

codec/dagcbor: add DecodeOptions.ExperimentalDeterminism #390

Merged
merged 1 commit into from
Apr 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions codec/dagcbor/unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@ const (
type DecodeOptions struct {
// If true, parse DAG-CBOR tag(42) as Link nodes, otherwise reject them
AllowLinks bool

// TODO: ExperimentalDeterminism enforces map key order, but not the other parts
// of the spec such as integers or floats. See the fuzz failures spotted in
// https://github.com/ipld/go-ipld-prime/pull/389.
// When we're done implementing strictness, deprecate the option in favor of
// StrictDeterminism, but keep accepting both for backwards compatibility.

// ExperimentalDeterminism requires decoded DAG-CBOR bytes to be canonical as per
// the spec. For example, this means that integers and floats be encoded in
// a particular way, and map keys be sorted.
//
// The decoder does not enforce this requirement by default, as the codec
// was originally implemented without these rules. Because of that, there's
// a significant amount of published data that isn't canonical but should
// still decode with the default settings for backwards compatibility.
//
// Note that this option is experimental as it only implements partial strictness.
ExperimentalDeterminism bool
}

// Decode deserializes data from the given io.Reader and feeds it into the given datamodel.NodeAssembler.
Expand Down Expand Up @@ -122,6 +140,7 @@ func unmarshal2(na datamodel.NodeAssembler, tokSrc shared.TokenSource, tk *tok.T
return err
}
observedLen := 0
lastKey := ""
for {
_, err := tokSrc.Step(tk)
if err != nil {
Expand All @@ -146,6 +165,12 @@ func unmarshal2(na datamodel.NodeAssembler, tokSrc shared.TokenSource, tk *tok.T
if observedLen > expectLen {
return fmt.Errorf("unexpected continuation of map elements beyond declared length")
}
if observedLen > 1 && options.ExperimentalDeterminism {
if len(lastKey) > len(tk.Str) || lastKey > tk.Str {
return fmt.Errorf("map key %q is not after %q as per RFC7049", tk.Str, lastKey)
}
}
lastKey = tk.Str
mva, err := ma.AssembleEntry(tk.Str)
if err != nil { // return in error if the key was rejected
return err
Expand Down
18 changes: 16 additions & 2 deletions node/bindnode/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ func FuzzBindnodeViaDagCBOR(f *testing.F) {
t.Logf("node in dag-json: %s", marshalDagJSON(t, node))
}

// Is nodeDagCBOR canonically encoded, i.e. strictly deterministic as
// per the DAG-CBOR spec? This matters for the re-encode checks below.
// Note that we want to use the non-strict decoder for fuzzing,
// as that default is what the vast majority users will use.
canonicalNodeDagCBOR := true
canonicalDecoder := dagcbor.DecodeOptions{AllowLinks: true, ExperimentalDeterminism: true}
if err := canonicalDecoder.Decode(basicnode.Prototype.Any.NewBuilder(), bytes.NewReader(nodeDagCBOR)); err != nil {
canonicalNodeDagCBOR = false
t.Logf("note that this node dag-cbor isn't canonical!")
}

ts := new(schema.TypeSystem)
ts.Init()
// For the time being, we're not interested in panics from
Expand Down Expand Up @@ -265,9 +276,12 @@ func FuzzBindnodeViaDagCBOR(f *testing.F) {
// so to get useful output, use reflect to dereference them.
t.Logf("decode successful: %#v", reflect.ValueOf(bindnode.Unwrap(node)).Elem().Interface())
reenc := marshalDagCBOR(t, node)
if !bytes.Equal(reenc, nodeDagCBOR) {
switch {
case canonicalNodeDagCBOR && !bytes.Equal(reenc, nodeDagCBOR):
t.Errorf("node reencoded as %X rather than %X", reenc, nodeDagCBOR)
} else {
case !canonicalNodeDagCBOR && bytes.Equal(reenc, nodeDagCBOR):
t.Errorf("node reencoded as %X even though it's not canonical", reenc)
default:
t.Logf("re-encode successful: %X", reenc)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
[]byte("\xa1etypes\xa1dRoot\xa1cmap\xa2gkeyTypefStringivalueTypecInt")
[]byte("\xa2ac0a00")