From 033cc4da768fff3d07de5e66e78b39908c212d60 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Tue, 28 Sep 2021 18:13:58 +0200 Subject: [PATCH 1/3] traversal: implement monotonically decrementing budgets. It's optional, and currently not on by default, but easy to set. --- traversal/fns.go | 13 +++++++++++++ traversal/focus.go | 30 ++++++++++++++++++++++++++++++ traversal/walk.go | 18 ++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/traversal/fns.go b/traversal/fns.go index f9096780..a6fd81e7 100644 --- a/traversal/fns.go +++ b/traversal/fns.go @@ -36,6 +36,7 @@ type Progress struct { Path datamodel.Path Link datamodel.Link } + Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt. } type Config struct { @@ -44,6 +45,18 @@ type Config struct { LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser // Chooser for Node implementations to produce during automatic link traversal. } +type Budget struct { + // Fields below are described as "monotonically-decrementing", because that's what the traversal library will do with them, + // but they are user-accessable and can be reset to higher numbers again by code in the visitor callbacks. This is not recommended (why?), but possible. + + // If you set any budgets (by having a non-nil Progress.Budget field), you must set some value for all of them. + // Traversal halts when _any_ of the budgets reaches zero. + // The max value of an int (math.MaxInt64) is acceptable for any budget you don't care about. + + NodeBudget int64 // A monotonically-decrementing "budget" for how many more nodes we're willing to visit before halting. + LinkBudget int64 // A monotonically-decrementing "budget" for how many more links we're willing to load before halting. (This is not aware of any caching; it's purely in terms of links encountered and traversed.) +} + // LinkTargetNodePrototypeChooser is a function that returns a NodePrototype based on // the information in a Link and/or its LinkContext. // diff --git a/traversal/focus.go b/traversal/focus.go index 9f142d06..020722bc 100644 --- a/traversal/focus.go +++ b/traversal/focus.go @@ -87,6 +87,13 @@ func (prog *Progress) get(n datamodel.Node, p datamodel.Path, trackProgress bool segments := p.Segments() var prev datamodel.Node // for LinkContext for i, seg := range segments { + // Check the budget! + if prog.Budget != nil { + prog.Budget.NodeBudget-- + if prog.Budget.NodeBudget <= 0 { + return nil, fmt.Errorf("traversal budget for nodes visited exceeded") + } + } // Traverse the segment. switch n.Kind() { case datamodel.Kind_Invalid: @@ -112,6 +119,14 @@ func (prog *Progress) get(n datamodel.Node, p datamodel.Path, trackProgress bool } // Dereference any links. for n.Kind() == datamodel.Kind_Link { + // Check the budget! + if prog.Budget != nil { + prog.Budget.LinkBudget-- + if prog.Budget.LinkBudget <= 0 { + return nil, fmt.Errorf("traversal budget for links exceeded") + } + } + // Put together the context info we'll offer to the loader and prototypeChooser. lnk, _ := n.AsLink() lnkCtx := linking.LinkContext{ Ctx: prog.Cfg.Ctx, @@ -201,6 +216,13 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl return na.AssignNode(n2) } seg, p2 := p.Shift() + // Check the budget! + if prog.Budget != nil { + prog.Budget.NodeBudget-- + if prog.Budget.NodeBudget <= 0 { + return fmt.Errorf("traversal budget for nodes visited exceeded") + } + } // Special branch for if we've entered createParent mode in an earlier step. // This needs slightly different logic because there's no prior node to reference // (and we wouldn't want to waste time creating a dummy one). @@ -319,6 +341,14 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl } return la.Finish() case datamodel.Kind_Link: + // Check the budget! + if prog.Budget != nil { + prog.Budget.LinkBudget-- + if prog.Budget.LinkBudget <= 0 { + return fmt.Errorf("traversal budget for links exceeded") + } + } + // Put together the context info we'll offer to the loader and prototypeChooser. lnkCtx := linking.LinkContext{ Ctx: prog.Cfg.Ctx, LinkPath: prog.Path, diff --git a/traversal/walk.go b/traversal/walk.go index 343c0a40..84396fff 100644 --- a/traversal/walk.go +++ b/traversal/walk.go @@ -86,6 +86,14 @@ func (prog Progress) WalkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF } func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitFn) error { + // Check the budget! + if prog.Budget != nil { + prog.Budget.NodeBudget-- + if prog.Budget.NodeBudget <= 0 { + return fmt.Errorf("traversal budget for nodes visited exceeded") + } + } + // Decide if this node is matched -- do callbacks as appropriate. if s.Decide(n) { if err := fn(prog, n, VisitReason_SelectionMatch); err != nil { return err @@ -95,12 +103,14 @@ func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF return err } } + // If we're handling scalars (e.g. not maps and lists) we can return now. nk := n.Kind() switch nk { case datamodel.Kind_Map, datamodel.Kind_List: // continue default: return nil } + // For maps and lists: recurse (in one of two ways, depending on if the selector also states specific interests). attn := s.Interests() if attn == nil { return prog.walkAdv_iterateAll(n, s, fn) @@ -180,6 +190,14 @@ func (prog Progress) walkAdv_iterateSelective(n datamodel.Node, attn []datamodel } func (prog Progress) loadLink(v datamodel.Node, parent datamodel.Node) (datamodel.Node, error) { + // Check the budget! + if prog.Budget != nil { + prog.Budget.LinkBudget-- + if prog.Budget.LinkBudget <= 0 { + return nil, fmt.Errorf("traversal budget for links exceeded") + } + } + // Put together the context info we'll offer to the loader and prototypeChooser. lnk, err := v.AsLink() if err != nil { return nil, err From ed051886b987aaed426f204cdb97693222100948 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Wed, 29 Sep 2021 01:25:32 +0200 Subject: [PATCH 2/3] traversal: budget tests, well-typed errors, more error info, and fix off-by-one. --- traversal/fns.go | 15 ++++++++++ traversal/focus.go | 18 ++++++------ traversal/walk.go | 16 +++++------ traversal/walk_test.go | 65 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 17 deletions(-) diff --git a/traversal/fns.go b/traversal/fns.go index a6fd81e7..773a91d9 100644 --- a/traversal/fns.go +++ b/traversal/fns.go @@ -2,6 +2,7 @@ package traversal import ( "context" + "fmt" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" @@ -80,3 +81,17 @@ type SkipMe struct{} func (SkipMe) Error() string { return "skip" } + +type ErrBudgetExceeded struct { + BudgetKind string // "node"|"link" + Path datamodel.Path + Link datamodel.Link // only present if BudgetKind=="link" +} + +func (e *ErrBudgetExceeded) Error() string { + msg := fmt.Sprintf("traversal budget exceeded: budget for %ss reached zero as we reached path %q", e.BudgetKind, e.Path) + if e.Link != nil { + msg += fmt.Sprintf(" (link: %q)", e.Link) + } + return msg +} diff --git a/traversal/focus.go b/traversal/focus.go index 020722bc..1e901cc8 100644 --- a/traversal/focus.go +++ b/traversal/focus.go @@ -91,7 +91,7 @@ func (prog *Progress) get(n datamodel.Node, p datamodel.Path, trackProgress bool if prog.Budget != nil { prog.Budget.NodeBudget-- if prog.Budget.NodeBudget <= 0 { - return nil, fmt.Errorf("traversal budget for nodes visited exceeded") + return nil, &ErrBudgetExceeded{BudgetKind: "node", Path: prog.Path} } } // Traverse the segment. @@ -119,15 +119,15 @@ func (prog *Progress) get(n datamodel.Node, p datamodel.Path, trackProgress bool } // Dereference any links. for n.Kind() == datamodel.Kind_Link { + lnk, _ := n.AsLink() // Check the budget! if prog.Budget != nil { - prog.Budget.LinkBudget-- if prog.Budget.LinkBudget <= 0 { - return nil, fmt.Errorf("traversal budget for links exceeded") + return nil, &ErrBudgetExceeded{BudgetKind: "link", Path: prog.Path, Link: lnk} } + prog.Budget.LinkBudget-- } // Put together the context info we'll offer to the loader and prototypeChooser. - lnk, _ := n.AsLink() lnkCtx := linking.LinkContext{ Ctx: prog.Cfg.Ctx, LinkPath: p.Truncate(i), @@ -218,10 +218,10 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl seg, p2 := p.Shift() // Check the budget! if prog.Budget != nil { - prog.Budget.NodeBudget-- if prog.Budget.NodeBudget <= 0 { - return fmt.Errorf("traversal budget for nodes visited exceeded") + return &ErrBudgetExceeded{BudgetKind: "node", Path: prog.Path} } + prog.Budget.NodeBudget-- } // Special branch for if we've entered createParent mode in an earlier step. // This needs slightly different logic because there's no prior node to reference @@ -341,12 +341,13 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl } return la.Finish() case datamodel.Kind_Link: + lnk, _ := n.AsLink() // Check the budget! if prog.Budget != nil { - prog.Budget.LinkBudget-- if prog.Budget.LinkBudget <= 0 { - return fmt.Errorf("traversal budget for links exceeded") + return &ErrBudgetExceeded{BudgetKind: "link", Path: prog.Path, Link: lnk} } + prog.Budget.LinkBudget-- } // Put together the context info we'll offer to the loader and prototypeChooser. lnkCtx := linking.LinkContext{ @@ -355,7 +356,6 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl LinkNode: n, ParentNode: nil, // TODO inconvenient that we don't have this. maybe this whole case should be a helper function. } - lnk, _ := n.AsLink() // Pick what in-memory format we will build. np, err := prog.Cfg.LinkTargetNodePrototypeChooser(lnk, lnkCtx) if err != nil { diff --git a/traversal/walk.go b/traversal/walk.go index 84396fff..1f0fae10 100644 --- a/traversal/walk.go +++ b/traversal/walk.go @@ -88,10 +88,10 @@ func (prog Progress) WalkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitFn) error { // Check the budget! if prog.Budget != nil { - prog.Budget.NodeBudget-- if prog.Budget.NodeBudget <= 0 { - return fmt.Errorf("traversal budget for nodes visited exceeded") + return &ErrBudgetExceeded{BudgetKind: "node", Path: prog.Path} } + prog.Budget.NodeBudget-- } // Decide if this node is matched -- do callbacks as appropriate. if s.Decide(n) { @@ -190,18 +190,18 @@ func (prog Progress) walkAdv_iterateSelective(n datamodel.Node, attn []datamodel } func (prog Progress) loadLink(v datamodel.Node, parent datamodel.Node) (datamodel.Node, error) { + lnk, err := v.AsLink() + if err != nil { + return nil, err + } // Check the budget! if prog.Budget != nil { - prog.Budget.LinkBudget-- if prog.Budget.LinkBudget <= 0 { - return nil, fmt.Errorf("traversal budget for links exceeded") + return nil, &ErrBudgetExceeded{BudgetKind: "link", Path: prog.Path, Link: lnk} } + prog.Budget.LinkBudget-- } // Put together the context info we'll offer to the loader and prototypeChooser. - lnk, err := v.AsLink() - if err != nil { - return nil, err - } lnkCtx := linking.LinkContext{ Ctx: prog.Cfg.Ctx, LinkPath: prog.Path, diff --git a/traversal/walk_test.go b/traversal/walk_test.go index 81c312c4..f68353ff 100644 --- a/traversal/walk_test.go +++ b/traversal/walk_test.go @@ -3,6 +3,7 @@ package traversal_test import ( "testing" + qt "github.com/frankban/quicktest" . "github.com/warpfork/go-wish" _ "github.com/ipld/go-ipld-prime/codec/dagjson" @@ -257,3 +258,67 @@ func TestWalkMatching(t *testing.T) { Wish(t, order, ShouldEqual, 7) }) } + +func TestWalkBudgets(t *testing.T) { + ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) + t.Run("node-budget-halts", func(t *testing.T) { + ss := ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) { + efsb.Insert("foo", ssb.Matcher()) + efsb.Insert("bar", ssb.Matcher()) + }) + s, err := ss.Selector() + qt.Assert(t, err, qt.Equals, nil) + var order int + prog := traversal.Progress{} + prog.Budget = &traversal.Budget{ + NodeBudget: 2, // should reach root, then "foo", then stop. + } + err = prog.WalkMatching(middleMapNode, s, func(prog traversal.Progress, n datamodel.Node) error { + switch order { + case 0: + qt.Assert(t, n, qt.CmpEquals(), basicnode.NewBool(true)) + qt.Assert(t, prog.Path.String(), qt.Equals, "foo") + } + order++ + return nil + }) + qt.Check(t, order, qt.Equals, 1) // because it should've stopped early + qt.Assert(t, err, qt.Not(qt.Equals), nil) + qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for nodes reached zero as we reached path "bar"`) + }) + t.Run("link-budget-halts", func(t *testing.T) { + ss := ssb.ExploreAll(ssb.Matcher()) + s, err := ss.Selector() + qt.Assert(t, err, qt.Equals, nil) + var order int + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead + err = traversal.Progress{ + Cfg: &traversal.Config{ + LinkSystem: lsys, + LinkTargetNodePrototypeChooser: basicnode.Chooser, + }, + Budget: &traversal.Budget{ + NodeBudget: 9000, + LinkBudget: 3, + }, + }.WalkMatching(middleListNode, s, func(prog traversal.Progress, n datamodel.Node) error { + switch order { + case 0: + qt.Assert(t, n, qt.CmpEquals(), basicnode.NewString("alpha")) + qt.Assert(t, prog.Path.String(), qt.Equals, "0") + case 1: + qt.Assert(t, n, qt.CmpEquals(), basicnode.NewString("alpha")) + qt.Assert(t, prog.Path.String(), qt.Equals, "1") + case 2: + qt.Assert(t, n, qt.CmpEquals(), basicnode.NewString("beta")) + qt.Assert(t, prog.Path.String(), qt.Equals, "2") + } + order++ + return nil + }) + qt.Check(t, order, qt.Equals, 3) + qt.Assert(t, err, qt.Not(qt.Equals), nil) + qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for links reached zero as we reached path "3" (link: "baguqeeyexkjwnfy")`) + }) +} From 3f112b45f55d565a8a13fe72d07fb0953ecfed10 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Wed, 29 Sep 2021 14:10:43 +0200 Subject: [PATCH 3/3] traversal: tweak phrasing of error message Co-authored-by: Peter Rabbitson --- traversal/fns.go | 2 +- traversal/walk_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/traversal/fns.go b/traversal/fns.go index 773a91d9..fe616daf 100644 --- a/traversal/fns.go +++ b/traversal/fns.go @@ -89,7 +89,7 @@ type ErrBudgetExceeded struct { } func (e *ErrBudgetExceeded) Error() string { - msg := fmt.Sprintf("traversal budget exceeded: budget for %ss reached zero as we reached path %q", e.BudgetKind, e.Path) + msg := fmt.Sprintf("traversal budget exceeded: budget for %ss reached zero while on path %q", e.BudgetKind, e.Path) if e.Link != nil { msg += fmt.Sprintf(" (link: %q)", e.Link) } diff --git a/traversal/walk_test.go b/traversal/walk_test.go index f68353ff..db456a55 100644 --- a/traversal/walk_test.go +++ b/traversal/walk_test.go @@ -284,7 +284,7 @@ func TestWalkBudgets(t *testing.T) { }) qt.Check(t, order, qt.Equals, 1) // because it should've stopped early qt.Assert(t, err, qt.Not(qt.Equals), nil) - qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for nodes reached zero as we reached path "bar"`) + qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for nodes reached zero while on path "bar"`) }) t.Run("link-budget-halts", func(t *testing.T) { ss := ssb.ExploreAll(ssb.Matcher()) @@ -319,6 +319,6 @@ func TestWalkBudgets(t *testing.T) { }) qt.Check(t, order, qt.Equals, 3) qt.Assert(t, err, qt.Not(qt.Equals), nil) - qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for links reached zero as we reached path "3" (link: "baguqeeyexkjwnfy")`) + qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for links reached zero while on path "3" (link: "baguqeeyexkjwnfy")`) }) }