From 348eca4fd1ab02a46b08d474fcad65eba57b18fb Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 7 Dec 2022 17:44:28 +0000 Subject: [PATCH 01/54] compact btree alloactor --- state/aggregator_test.go | 63 +++ state/btree_index.go | 852 +++++++++++++++++++++++++++++++++++++++ state/domain.go | 1 + 3 files changed, 916 insertions(+) create mode 100644 state/btree_index.go diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 55fd43e7f..050e5f027 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -424,3 +424,66 @@ func Test_EncodeCommitmentState(t *testing.T) { require.EqualValues(t, cs.txNum, dec.txNum) require.EqualValues(t, cs.trieState, dec.trieState) } + +func Test_BtreeIndex(t *testing.T) { + count := uint64(2000000) + M := uint64(2048) + bt := newBtAlloc(count, M) + + for i := uint64(0); i < count; i++ { + bt.data[i] = uint64(i) + } + + //bt.traverse() + //bt.traverseTrick() + bt.traverseDfs() + + bt.printSearchMx() + + bt.findNode(16393) +} + +func Test_BtreeIndex_Allocation(t *testing.T) { + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + m := 2 + for i := 5; i < 24; i++ { + t.Run(fmt.Sprintf("%d", m< b { + return a + } + return b +} + +func newBtAlloc(k, M uint64) *btAlloc { + d := logBase(k, M) + m := max64(2, M>>1) + + fmt.Printf("k=%d d=%d, M=%d m=%d\n", k, d, M, m) + a := &btAlloc{ + vx: make([]uint64, d+1), + sons: make([][]uint64, d+1), + cursors: make([]cur, d), + nodes: make([][]node, d), + data: make([]uint64, k), + M: M, + K: k, + d: d, + } + a.vx[0] = 1 + a.vx[d] = k + + nnc := func(vx uint64) uint64 { + return uint64(math.Ceil(float64(vx) / float64(M))) + } + + for i := a.d - 1; i > 0; i-- { + nnc := uint64(math.Ceil(float64(a.vx[i+1]) / float64(M))) + //nvc := uint64(math.Floor(float64(a.vx[i+1]) / float64(m))-1) + //nnc := a.vx[i+1] / M + //nvc := a.vx[i+1] / m + //bvc := a.vx[i+1] / (m + (m >> 1)) + //_, _ = nvc, nnc + a.vx[i] = min64(uint64(math.Pow(float64(M), float64(i))), nnc) + } + + ncount := uint64(0) + pnv := uint64(0) + for l := a.d - 1; l > 0; l-- { + s := nnc(a.vx[l+1]) + //left := a.vx[l+1] % M + //if left > 0 { + // if left < m { + // s-- + // newPrev := M - (m - left) + // dp := M - newPrev + // a.sons[l] = append(a.sons[l], 1, newPrev, 1, left+dp) + // } else { + // a.sons[l] = append(a.sons[l], 1, left) + // } + //} + a.sons[l] = append(a.sons[l], s, M) + for ik := 0; ik < len(a.sons[l]); ik += 2 { + ncount += a.sons[l][ik] * a.sons[l][ik+1] + if l == 1 { + pnv += a.sons[l][ik] + } + } + } + a.sons[0] = []uint64{1, pnv} + ncount += a.sons[0][0] * a.sons[0][1] // last one + a.N = ncount + fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-uint64(k))/float64(a.N)) + + for i, v := range a.sons { + fmt.Printf("L%d=%v\n", i, v) + } + + return a +} + +type cur struct { + l, p, di, si uint64 + + //l - level + //p - pos inside level + //si - current, actual son index + //di - data array index +} + +type node struct { + p, d, s, fc uint64 +} + +func (a *btAlloc) traverseTrick() { + for l := 0; l < len(a.sons)-1; l++ { + if len(a.sons[l]) < 2 { + panic("invalid btree allocation markup") + } + a.cursors[l] = cur{uint64(l), 1, 0, 0} + a.nodes[l] = make([]node, 0) + } + + lf := a.cursors[len(a.cursors)-1] + c := a.cursors[(len(a.cursors) - 2)] + + var d uint64 + var fin bool + + lf.di = d + lf.si++ + d++ + a.cursors[len(a.cursors)-1] = lf + + moved := true + for int(c.p) <= len(a.sons[c.l]) { + if fin || d > a.K { + break + } + c, lf = a.cursors[c.l], a.cursors[lf.l] + + c.di = d + c.si++ + + sons := a.sons[lf.l][lf.p] + for i := uint64(1); i < sons; i++ { + lf.si++ + d++ + } + lf.di = d + d++ + + a.nodes[lf.l] = append(a.nodes[lf.l], node{p: lf.p, s: lf.si, d: lf.di}) + a.nodes[c.l] = append(a.nodes[c.l], node{p: c.p, s: c.si, d: c.di}) + a.cursors[lf.l] = lf + a.cursors[c.l] = c + + for l := lf.l; l >= 0; l-- { + sc := a.cursors[l] + sons, gsons := a.sons[sc.l][sc.p-1], a.sons[sc.l][sc.p] + if l < c.l && moved { + sc.di = d + a.nodes[sc.l] = append(a.nodes[sc.l], node{d: sc.di}) + sc.si++ + d++ + } + moved = (sc.si-1)/gsons != sc.si/gsons + if sc.si/gsons >= sons { + sz := uint64(len(a.sons[sc.l]) - 1) + if sc.p+2 > sz { + fin = l == lf.l + break + } else { + sc.p += 2 + sc.si, sc.di = 0, 0 + } + //moved = true + } + if l == lf.l { + sc.si++ + sc.di = d + d++ + } + a.cursors[l] = sc + if l == 0 { + break + } + } + moved = false + } +} + +func (a *btAlloc) traverseDfs() { + for l := 0; l < len(a.sons)-1; l++ { + if len(a.sons[l]) < 2 { + panic("invalid btree allocation markup") + } + a.cursors[l] = cur{uint64(l), 1, 0, 0} + a.nodes[l] = make([]node, 0) + } + + c := a.cursors[len(a.cursors)-1] + pc := a.cursors[(len(a.cursors) - 2)] + root := new(node) + trace := false + + var di uint64 + for stop := false; !stop; { + // fill leaves, mark parent if needed (until all grandparents not marked up until root) + // check if eldest parent has brothers + // -- has bros -> fill their leaves from the bottom + // -- no bros -> shift cursor (tricky) + if di > a.K { + a.N = di - 1 // actually filled node count + break + } + + bros, parents := a.sons[c.l][c.p], a.sons[c.l][c.p-1] + for i := uint64(0); i < bros; i++ { + c.di = di + if trace { + fmt.Printf("L%d |%d| d %2d s %2d\n", c.l, c.p, c.di, c.si) + } + c.si++ + di++ + + if i == 0 { + pc.di = di + if trace { + fmt.Printf("P%d |%d| d %2d s %2d\n", pc.l, pc.p, pc.di, pc.si) + } + pc.si++ + di++ + } + } + + a.nodes[c.l] = append(a.nodes[c.l], node{p: c.p, d: c.di, s: c.si}) + a.nodes[pc.l] = append(a.nodes[pc.l], node{p: pc.p, d: pc.di, s: pc.si, fc: uint64(len(a.nodes[c.l]) - 1)}) + + pid := c.si / bros + if pid >= parents { + if c.p+2 >= uint64(len(a.sons[c.l])) { + stop = true // end of row + if trace { + fmt.Printf("F%d |%d| d %2d\n", c.l, c.p, c.di) + } + } else { + c.p += 2 + c.si = 0 + c.di = 0 + } + } + a.cursors[c.l] = c + a.cursors[pc.l] = pc + + for l := pc.l; l >= 0; l-- { + pc := a.cursors[l] + uncles := a.sons[pc.l][pc.p] + grands := a.sons[pc.l][pc.p-1] + + pi1 := pc.si / uncles + pc.si++ + pc.di = 0 + + pi2 := pc.si / uncles + moved := pi2-pi1 != 0 + + switch { + case pc.l > 0: + gp := a.cursors[pc.l-1] + if gp.di == 0 { + gp.di = di + di++ + if trace { + fmt.Printf("P%d |%d| d %2d s %2d\n", gp.l, gp.p, gp.di, gp.si) + } + a.nodes[gp.l] = append(a.nodes[gp.l], node{p: gp.p, d: gp.di, s: gp.si, fc: uint64(len(a.nodes[l]) - 1)}) + a.cursors[gp.l] = gp + } + default: + if root.d == 0 { + root.d = di + //di++ + if trace { + fmt.Printf("ROOT | d %2d\n", root.d) + } + } + } + + //fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si-1) + if pi2 >= grands { // skip one step of si due to different parental filling order + if pc.p+2 >= uint64(len(a.sons[pc.l])) { + if trace { + fmt.Printf("EoRow %d |%d|\n", pc.l, pc.p) + } + break // end of row + } + //fmt.Printf("N %d d%d s%d\n", pc.l, pc.di, pc.si) + //fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si, pid) + pc.p += 2 + pc.si = 0 + pc.di = 0 + } + a.cursors[pc.l] = pc + + if !moved { + break + } + } + } +} + +func (a *btAlloc) traverse() { + var sum uint64 + for l := 0; l < len(a.sons)-1; l++ { + if len(a.sons[l]) < 2 { + panic("invalid btree allocation markup") + } + a.cursors[l] = cur{uint64(l), 1, 0, 0} + + for i := 0; i < len(a.sons[l]); i += 2 { + sum += a.sons[l][i] * a.sons[l][i+1] + } + a.nodes[l] = make([]node, 0) + } + fmt.Printf("nodes total %d\n", sum) + + c := a.cursors[len(a.cursors)-1] + + var di uint64 + for stop := false; !stop; { + bros := a.sons[c.l][c.p] + parents := a.sons[c.l][c.p-1] + + // fill leaves, mark parent if needed (until all grandparents not marked up until root) + // check if eldest parent has brothers + // -- has bros -> fill their leaves from the bottom + // -- no bros -> shift cursor (tricky) + + for i := uint64(0); i < bros; i++ { + c.di = di + fmt.Printf("L%d |%d| d %2d s %2d\n", c.l, c.p, c.di, c.si) + c.si++ + di++ + } + + pid := c.si / bros + if pid >= parents { + if c.p+2 >= uint64(len(a.sons[c.l])) { + stop = true // end of row + fmt.Printf("F%d |%d| d %2d\n", c.l, c.p, c.di) + } else { + //fmt.Printf("N %d d%d s%d\n", c.l, c.di, c.si) + //a.nodes[c.l] = append(a.nodes[c.l], node{p: c.p, d: c.di, s: c.si}) + c.p += 2 + c.si = 0 + c.di = 0 + } + } + a.cursors[c.l] = c + + for l := len(a.cursors) - 2; l >= 0; l-- { + pc := a.cursors[l] + uncles := a.sons[pc.l][pc.p] + grands := a.sons[pc.l][pc.p-1] + + pi1 := pc.si / uncles + pc.si++ + pi2 := pc.si / uncles + moved := pi2-pi1 != 0 + pc.di = di + fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si-1, pid) + a.nodes[pc.l] = append(a.nodes[pc.l], node{p: pc.p, d: pc.di, s: pc.si}) + + di++ + + if pi2 >= grands { // skip one step of si due to different parental filling order + if pc.p+2 >= uint64(len(a.sons[pc.l])) { + // end of row + fmt.Printf("E%d |%d| d %2d\n", pc.l, pc.p, pc.di) + break + } + //fmt.Printf("N %d d%d s%d\n", pc.l, pc.di, pc.si) + //fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si, pid) + pc.p += 2 + pc.si = 0 + pc.di = 0 + } + a.cursors[pc.l] = pc + + if l >= 1 && a.cursors[l-1].di == 0 { + continue + } + if !moved { + break + } + } + } +} + +func (a *btAlloc) fetchByDi(i uint64) (uint64, bool) { + if int(i) >= len(a.data) { + return 0, true + } + return a.data[i], false +} + +func binsearch(a []node, x uint64) uint64 { + l, r := uint64(0), uint64(len(a)) + for l < r { + mid := (l + r) / 2 + if a[mid].d < x { + l = mid + 1 + } else { + r = mid + } + } + return l +} + +func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { + var exit bool + var di uint64 + for l <= r { + m := (l + r) >> 1 + if l == r { + m = l + exit = true + } + + switch direct { + case true: + di = a.data[m] + case false: + di = a.nodes[i][m].d + } + + mkey, nf := a.fetchByDi(di) + a.naccess++ + switch { + case nf: + break + case mkey == x: + return m, r, true + case mkey < x: + if exit { + break + } + if m+1 == r { + if m > 0 { + m-- + } + return m, r, false + } + l = m + 1 + default: + if exit { + break + } + if m-l == 1 && l > 0 { + return l - 1, r, false + } + r = m + } + if exit { + break + } + } + return l, r, false +} + +func (a *btAlloc) bsNode(i, x, l, r uint64, direct bool) (*node, uint64) { + var exit bool + var di, lm uint64 + n := new(node) + + for l <= r { + m := (l + r) >> 1 + if l == r { + m = l + exit = true + } + lm = m + + switch direct { + case true: + di = a.data[m] + case false: + di = a.nodes[i][m].d + n = &a.nodes[i][m] + } + + mkey, nf := a.fetchByDi(di) + a.naccess++ + switch { + case nf: + break + case mkey == x: + return n, m + case mkey < x: + //if exit { + // break + //} + if m+1 == r { + return n, m + } + l = m + default: + //if exit { + // break + //} + if m == l { + return n, m + } + r = m + } + if exit { + break + } + } + return nil, lm +} + +type pt struct { + l, n uint64 +} + +func (a *btAlloc) findNode(ik uint64) *node { + var L, m uint64 + R := uint64(len(a.nodes[0]) - 1) + + lhn := new(node) + for l, level := range a.nodes { + lhn, m = a.bsNode(uint64(l), ik, L, R, false) + if lhn == nil { + L = 0 + fmt.Printf("found nil key %d lvl=%d naccess=%d\n", level[m].d, l, a.naccess) + break + } + + k := lhn.d + //k, found := a.fetchByDi(lhn.d) + if k > ik { + if lhn.fc > 0 { + L = lhn.fc - 1 + } else { + L = 0 + } + } else if k == ik { + fmt.Printf("found key %d naccess=%d\n", level[m].d, a.naccess) + //return true + break + } else { + if m < uint64(len(level)) { + R = level[m+1].fc + //R = level[m].fc + } else { + R = uint64(len(a.nodes[l+1]) - 1) + } + } + fmt.Printf("range={%+v} (%d, %d) L=%d naccess=%d\n", lhn, L, R, l, a.naccess) + } + + if ik < lhn.d { + L = 0 + } else if ik == lhn.d { + fmt.Printf("last found key %d naccess=%d\n", lhn.d, a.naccess) + return nil + } else { + L = lhn.d + } + + a.naccess = 0 + mk, _, found := a.bs(a.d-1, ik, L, a.nodes[a.d-1][m+1].d, true) + if found { + //if trace { + fmt.Printf("last found key %d naccess=%d\n", mk, a.naccess) + //} + //return true + } + + return nil +} + +func (a *btAlloc) lookup(ik uint64) bool { + trace, direct, found := true, false, false + mk, l, r := uint64(0), uint64(0), uint64(len(a.nodes[0])-1) + + for i := 0; i < len(a.nodes); i++ { + mk, r, found = a.bs(uint64(i), ik, l, r, direct) + if found { + if trace { + fmt.Printf("found key %d naccess=%d\n", a.nodes[i][mk].d, a.naccess) + } + return true + } + if trace { + fmt.Printf("range={%d,%d} (%d, %d) L=%d naccess=%d\n", a.nodes[i][l].d, a.nodes[i][r].d, l, r, i, a.naccess) + } + + l, r = a.nodes[i][mk].fc, a.nodes[i][r].fc + if trace && i < len(a.nodes)-1 { + fmt.Printf("next range={%d,%d} (%d, %d) L=%d naccess=%d\n", a.nodes[i+1][l].d, a.nodes[i+1][r].d, l, r, i+1, a.naccess) + } + } + + mindi, maxdi := uint64(0), a.nodes[a.d-1][l+1].d + if l > 0 { + mindi = a.nodes[a.d-1][l-1].d + } + if trace { + fmt.Printf("smallest range {%d-%d} (%d-%d)\n", mindi, maxdi, l-1, l+1) + } + + // search in smallest found interval + direct = true + mk, _, found = a.bs(a.d-1, ik, mindi, maxdi, direct) + if found { + if trace { + fmt.Printf("last found key %d naccess=%d\n", mk, a.naccess) + } + return true + } + + return false +} + +func (a *btAlloc) search(ik uint64) bool { + l, r := uint64(0), uint64(len(a.nodes[0])) + lr, hr := uint64(0), a.N + var naccess int64 + var trace bool + for i := 0; i < len(a.nodes); i++ { + for l < r { + m := (l + r) >> 1 + mkey, nf := a.fetchByDi(a.nodes[i][m].d) + naccess++ + if nf { + break + } + if mkey < ik { + lr = mkey + l = m + 1 + } else if mkey == ik { + if trace { + fmt.Printf("found key %d @%d naccess=%d\n", mkey, m, naccess) + } + return true //mkey + } else { + r = m + hr = mkey + } + } + if trace { + fmt.Printf("range={%d,%d} L=%d naccess=%d\n", lr, hr, i, naccess) + } + if i == len(a.nodes) { + if trace { + fmt.Printf("%d|%d - %d|%d\n", l, a.nodes[i][l].d, r, a.nodes[i][r].d) + } + return true + } + if i+1 >= len(a.nodes) { + break + } + l = binsearch(a.nodes[i+1], lr) + r = binsearch(a.nodes[i+1], hr) + } + + if trace { + fmt.Printf("smallest range %d-%d (%d-%d)\n", lr, hr, l, r) + } + if l == r && l > 0 { + l-- + } + + lr, hr = a.nodes[a.d-1][l].d, a.nodes[a.d-1][r].d + // search in smallest found interval + for lr < hr { + m := (lr + hr) >> 1 + mkey, nf := a.fetchByDi(m) + naccess++ + if nf { + break + } + if mkey < ik { + //lr = mkey + lr = m + 1 + } else if mkey == ik { + if trace { + fmt.Printf("last found key %d @%d naccess=%d\n", mkey, m, naccess) + } + return true //mkey + } else { + hr = m + //hr = mkey + } + } + + return false +} + +func (a *btAlloc) printSearchMx() { + for i, n := range a.nodes { + fmt.Printf("D%d |%d| ", i, len(n)) + for _, s := range n { + fmt.Printf("%d ", s.d) + } + fmt.Printf("\n") + } +} + +func OpenBtreeIndex(indexPath string) (*BtIndex, error) { + s, err := os.Stat(indexPath) + if err != nil { + return nil, err + } + + idx := &BtIndex{ + filePath: indexPath, + size: s.Size(), + modTime: s.ModTime(), + //idx: btree.NewG[uint64](32, commitmentItemLess), + } + + idx.file, err = os.Open(indexPath) + if err != nil { + return nil, err + } + + if idx.mmapUnix, idx.mmapWin, err = mmap.Mmap(idx.file, int(idx.size)); err != nil { + return nil, err + } + idx.data = idx.mmapUnix[:idx.size] + // Read number of keys and bytes per record + idx.baseDataID = binary.BigEndian.Uint64(idx.data[:8]) + idx.keyCount = binary.BigEndian.Uint64(idx.data[8:16]) + return idx, nil +} + +func (b *BtIndex) Size() int64 { return b.size } + +func (b *BtIndex) ModTime() time.Time { return b.modTime } + +func (b *BtIndex) BaseDataID() uint64 { return b.baseDataID } + +func (b *BtIndex) FilePath() string { return b.filePath } + +func (b *BtIndex) FileName() string { return path.Base(b.filePath) } + +func (b *BtIndex) Empty() bool { return b.keyCount == 0 } + +func (b *BtIndex) KeyCount() uint64 { return b.keyCount } + +func (b *BtIndex) Close() error { + if b == nil { + return nil + } + if err := mmap.Munmap(b.mmapUnix, b.mmapWin); err != nil { + return err + } + if err := b.file.Close(); err != nil { + return err + } + return nil +} + +func (b *BtIndex) Lookup(bucketHash, fingerprint uint64) uint64 { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) OrdinalLookup(i uint64) uint64 { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) ExtractOffsets() map[uint64]uint64 { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) RewriteWithOffsets(w *bufio.Writer, m map[uint64]uint64) error { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) DisableReadAhead() { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) EnableReadAhead() *interface{} { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) EnableMadvNormal() *interface{} { + //TODO implement me + panic("implement me") +} + +func (b *BtIndex) EnableWillNeed() *interface{} { + //TODO implement me + panic("implement me") +} diff --git a/state/domain.go b/state/domain.go index 5fe17d9bc..01a2392e4 100644 --- a/state/domain.go +++ b/state/domain.go @@ -54,6 +54,7 @@ var ( type filesItem struct { decompressor *compress.Decompressor index *recsplit.Index + bindex *BtIndex startTxNum uint64 endTxNum uint64 From 6eb898eb52935045c308a58ff2cc6d3880443df8 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 27 Jan 2023 18:16:18 +0000 Subject: [PATCH 02/54] add index building code --- state/aggregator_test.go | 100 +++++- state/btree_index.go | 720 ++++++++++++++++++++++++++++++++------- 2 files changed, 676 insertions(+), 144 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 050e5f027..0f664e805 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -6,14 +6,17 @@ import ( "fmt" "math/rand" "os" + "path" "path/filepath" "sync/atomic" "testing" + "time" "github.com/holiman/uint256" "github.com/ledgerwatch/log/v3" "github.com/stretchr/testify/require" + "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" @@ -425,22 +428,96 @@ func Test_EncodeCommitmentState(t *testing.T) { require.EqualValues(t, cs.trieState, dec.trieState) } -func Test_BtreeIndex(t *testing.T) { - count := uint64(2000000) - M := uint64(2048) - bt := newBtAlloc(count, M) +func Test_BtreeIndex_Seek(t *testing.T) { + tmp := t.TempDir() + args := BtIndexWriterArgs{ + Enums: true, + IndexFile: path.Join(tmp, "1M.bt"), + TmpDir: tmp, + KeyCount: 1_000, + EtlBufLimit: 0, + Salt: 0, + } + iw, err := NewBtIndexWriter(args) + require.NoError(t, err) + + defer iw.Close() + defer os.RemoveAll(tmp) + + rnd := rand.New(rand.NewSource(0)) + keys := make([]byte, 52) + lookafter := make([][]byte, 0) + for i := 0; i < args.KeyCount; i++ { + n, err := rnd.Read(keys[:52]) + require.EqualValues(t, n, 52) + require.NoError(t, err) + + err = iw.AddKey(keys[:], uint64(i)) + require.NoError(t, err) - for i := uint64(0); i < count; i++ { - bt.data[i] = uint64(i) + if i%1000 < 5 { + lookafter = append(lookafter, common.Copy(keys)) + } } - //bt.traverse() - //bt.traverseTrick() - bt.traverseDfs() + require.NoError(t, iw.Build()) + iw.Close() + + bt, err := OpenBtreeIndex(args.IndexFile, 4) + require.NoError(t, err) + require.EqualValues(t, bt.KeyCount(), args.KeyCount) - bt.printSearchMx() + idx := NewBtIndexReader(bt) + + for i := 0; i < len(lookafter); i += 5 { + cur, err := idx.Seek(lookafter[i]) + require.NoError(t, err) + //require.EqualValues(t, lookafter[i], cur.key) + require.EqualValues(t, uint64(i), cur.Value()) + for j := 0; j < 5; j++ { + //require.EqualValues(t, lookafter[i+j], idx.Key()) + require.EqualValues(t, uint64(i+j), cur.Value()) + cur.Next() + } + } - bt.findNode(16393) + bt.Close() +} + +func Test_InitBtreeIndex(t *testing.T) { + tmp := t.TempDir() + args := BtIndexWriterArgs{ + Enums: true, + IndexFile: path.Join(tmp, "100k.bt"), + TmpDir: tmp, + KeyCount: 100, + EtlBufLimit: 0, + Salt: 0, + } + iw, err := NewBtIndexWriter(args) + require.NoError(t, err) + + defer iw.Close() + defer os.RemoveAll(tmp) + + rnd := rand.New(rand.NewSource(0)) + keys := make([]byte, 52) + for i := 0; i < args.KeyCount; i++ { + n, err := rnd.Read(keys[:52]) + require.EqualValues(t, n, 52) + require.NoError(t, err) + + err = iw.AddKey(keys[:], uint64(i)) + require.NoError(t, err) + } + + require.NoError(t, iw.Build()) + iw.Close() + + bt, err := OpenBtreeIndex(args.IndexFile, 4) + require.NoError(t, err) + require.EqualValues(t, bt.KeyCount(), args.KeyCount) + bt.Close() } func Test_BtreeIndex_Allocation(t *testing.T) { @@ -486,4 +563,3 @@ func Benchmark_BtreeIndex_Search(b *testing.B) { bt.search(uint64(i % max)) } } - diff --git a/state/btree_index.go b/state/btree_index.go index 43cc652a3..feff45dc6 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -2,49 +2,149 @@ package state import ( "bufio" + "bytes" + "context" + "crypto/rand" "encoding/binary" "fmt" "math" + "math/bits" "os" "path" + "path/filepath" "time" - "github.com/google/btree" + "github.com/c2h5oh/datasize" + "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon-lib/common" + "github.com/ledgerwatch/erigon-lib/common/length" + "github.com/ledgerwatch/erigon-lib/etl" "github.com/ledgerwatch/erigon-lib/mmap" + "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" ) -type BtIndex struct { - bt *btree.BTreeG[uint64] - mmapWin *[mmap.MaxMapSize]byte - mmapUnix []byte - data []byte - file *os.File - size int64 - modTime time.Time - filePath string - keyCount uint64 - baseDataID uint64 +func logBase(n, base uint64) uint64 { + return uint64(math.Ceil(math.Log(float64(n)) / math.Log(float64(base)))) +} + +func min64(a, b uint64) uint64 { + if a < b { + return a + } + return b +} + +func max64(a, b uint64) uint64 { + if a > b { + return a + } + return b +} + +type markupCursor struct { + l, p, di, si uint64 + //l - level + //p - pos inside level + //si - current, actual son index + //di - data array index +} + +type node struct { + p, d, s, fc uint64 + key []byte + val []byte +} + +type key struct { + bucket, fprint uint64 } -type page struct { - i uint64 - keys uint64 - size uint64 - nodes []*node +func bytesToKey(b []byte) key { + if len(b) > 16 { + panic(fmt.Errorf("invalid size of key bytes to convert (size %d)", len(b))) + } + return key{ + bucket: binary.BigEndian.Uint64(b), + fprint: binary.BigEndian.Uint64(b[8:]), + } } -type inode struct { - page *page - node *node +func (k key) compare(k2 key) int { + if k.bucket < k2.bucket { + return -1 + } + if k.bucket > k2.bucket { + return 1 + } + if k.fprint < k2.fprint { + return -1 + } + if k.fprint > k2.fprint { + return 1 + } + return 0 } -type cursor struct { - stack []inode +func (k key) Bytes() []byte { + buf := make([]byte, 16) + binary.BigEndian.PutUint64(buf[:8], k.bucket) + binary.BigEndian.PutUint64(buf[8:], k.fprint) + return buf } -func isEven(n uint64) bool { - return n&1 == 0 +// deprecated +func binsearch(a []node, x uint64) uint64 { + l, r := uint64(0), uint64(len(a)) + for l < r { + mid := (l + r) / 2 + if a[mid].d < x { + l = mid + 1 + } else { + r = mid + } + } + return l +} + +type Cursor struct { + ctx context.Context + ix *BtIndex + + key []byte + value []byte + d uint64 +} + +func newCursor(ctx context.Context, k, v []byte, d uint64) *Cursor { + return &Cursor{ + ctx: ctx, + key: k, + value: v, + d: d, + } +} + +func (c *Cursor) Key() []byte { + return c.key +} + +func (c *Cursor) Value() []byte { + return c.value +} + +func (c *Cursor) Next() bool { + if c.d+1 >= c.ix.KeyCount() { + return false + } + k, v, err := c.ix.dataLookup(c.d + 1) + if err != nil { + return false + } + c.key = common.Copy(k) + c.value = common.Copy(v) + c.d++ + return true } type btAlloc struct { @@ -54,28 +154,12 @@ type btAlloc struct { K uint64 vx []uint64 // vertex count on level sons [][]uint64 // i - level; 0 <= i < d; j_k - amount, j_k+1 - child count - cursors []cur + cursors []markupCursor nodes [][]node data []uint64 naccess uint64 -} -func logBase(n, base uint64) uint64 { - return uint64(math.Ceil(math.Log(float64(n)) / math.Log(float64(base)))) -} - -func min64(a, b uint64) uint64 { - if a < b { - return a - } - return b -} - -func max64(a, b uint64) uint64 { - if a > b { - return a - } - return b + dataLookup func(di uint64) ([]byte, []byte, error) } func newBtAlloc(k, M uint64) *btAlloc { @@ -86,7 +170,7 @@ func newBtAlloc(k, M uint64) *btAlloc { a := &btAlloc{ vx: make([]uint64, d+1), sons: make([][]uint64, d+1), - cursors: make([]cur, d), + cursors: make([]markupCursor, d), nodes: make([][]node, d), data: make([]uint64, k), M: M, @@ -145,25 +229,12 @@ func newBtAlloc(k, M uint64) *btAlloc { return a } -type cur struct { - l, p, di, si uint64 - - //l - level - //p - pos inside level - //si - current, actual son index - //di - data array index -} - -type node struct { - p, d, s, fc uint64 -} - func (a *btAlloc) traverseTrick() { for l := 0; l < len(a.sons)-1; l++ { if len(a.sons[l]) < 2 { panic("invalid btree allocation markup") } - a.cursors[l] = cur{uint64(l), 1, 0, 0} + a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} a.nodes[l] = make([]node, 0) } @@ -241,7 +312,7 @@ func (a *btAlloc) traverseDfs() { if len(a.sons[l]) < 2 { panic("invalid btree allocation markup") } - a.cursors[l] = cur{uint64(l), 1, 0, 0} + a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} a.nodes[l] = make([]node, 0) } @@ -258,6 +329,7 @@ func (a *btAlloc) traverseDfs() { // -- no bros -> shift cursor (tricky) if di > a.K { a.N = di - 1 // actually filled node count + fmt.Printf("ncount=%d ∂%.5f\n", a.N, float64(a.N-a.K)/float64(a.N)) break } @@ -356,13 +428,14 @@ func (a *btAlloc) traverseDfs() { } } +// deprecated func (a *btAlloc) traverse() { var sum uint64 for l := 0; l < len(a.sons)-1; l++ { if len(a.sons[l]) < 2 { panic("invalid btree allocation markup") } - a.cursors[l] = cur{uint64(l), 1, 0, 0} + a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} for i := 0; i < len(a.sons[l]); i += 2 { sum += a.sons[l][i] * a.sons[l][i+1] @@ -444,6 +517,7 @@ func (a *btAlloc) traverse() { } } +// deprecated func (a *btAlloc) fetchByDi(i uint64) (uint64, bool) { if int(i) >= len(a.data) { return 0, true @@ -451,19 +525,34 @@ func (a *btAlloc) fetchByDi(i uint64) (uint64, bool) { return a.data[i], false } -func binsearch(a []node, x uint64) uint64 { - l, r := uint64(0), uint64(len(a)) +func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { + var exit bool + var di uint64 for l < r { - mid := (l + r) / 2 - if a[mid].d < x { - l = mid + 1 - } else { - r = mid + m := (l + r) >> 1 + + mk, value, err := a.dataLookup(di) + a.naccess++ + + cmp := bytes.Compare(mk, x) + switch { + case err != nil: + break + case cmp == 0: + return newCursor(context.TODO(), mk, value, m), nil + case cmp == -1: + if exit { + break + } + l = m + 1 + default: + r = m } } - return l + return nil, fmt.Errorf("not found") } +// deprecated func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { var exit bool var di uint64 @@ -476,7 +565,12 @@ func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { switch direct { case true: - di = a.data[m] + if m >= uint64(len(a.data)) { + di = a.data[a.K-1] + exit = true + } else { + di = a.data[m] + } case false: di = a.nodes[i][m].d } @@ -515,7 +609,7 @@ func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { return l, r, false } -func (a *btAlloc) bsNode(i, x, l, r uint64, direct bool) (*node, uint64) { +func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, uint64, []byte) { var exit bool var di, lm uint64 n := new(node) @@ -528,35 +622,27 @@ func (a *btAlloc) bsNode(i, x, l, r uint64, direct bool) (*node, uint64) { } lm = m - switch direct { - case true: - di = a.data[m] - case false: - di = a.nodes[i][m].d - n = &a.nodes[i][m] - } + di = a.nodes[i][m].d + n = &a.nodes[i][m] - mkey, nf := a.fetchByDi(di) a.naccess++ + + mk, value, err := a.dataLookup(di) + cmp := bytes.Compare(mk, x) switch { - case nf: + case err != nil: + fmt.Printf("err at switch %v\n", err) break - case mkey == x: - return n, m - case mkey < x: - //if exit { - // break - //} + case cmp == 0: + return n, m, value + case cmp < 0: if m+1 == r { - return n, m + return n, m, nil } l = m default: - //if exit { - // break - //} if m == l { - return n, m + return n, m, nil } r = m } @@ -564,39 +650,36 @@ func (a *btAlloc) bsNode(i, x, l, r uint64, direct bool) (*node, uint64) { break } } - return nil, lm -} - -type pt struct { - l, n uint64 + return nil, lm, nil } -func (a *btAlloc) findNode(ik uint64) *node { +func (a *btAlloc) seek(ik []byte) (*Cursor, error) { var L, m uint64 R := uint64(len(a.nodes[0]) - 1) - lhn := new(node) + ln := new(node) + var val []byte for l, level := range a.nodes { - lhn, m = a.bsNode(uint64(l), ik, L, R, false) - if lhn == nil { + ln, m, val = a.bsNode(uint64(l), L, R, ik) + if ln == nil { L = 0 fmt.Printf("found nil key %d lvl=%d naccess=%d\n", level[m].d, l, a.naccess) break } - k := lhn.d - //k, found := a.fetchByDi(lhn.d) - if k > ik { - if lhn.fc > 0 { - L = lhn.fc - 1 + switch bytes.Compare(ln.key, ik) { // k.compare(ik) { + case 1: + if ln.fc > 0 { + L = ln.fc - 1 } else { L = 0 } - } else if k == ik { - fmt.Printf("found key %d naccess=%d\n", level[m].d, a.naccess) + case 0: + fmt.Printf("found key %+v = %v naccess=%d\n", ik, val /*level[m].d,*/, a.naccess) //return true - break - } else { + return newCursor(context.TODO(), ln.key, val, ln.d), nil + //break + default: if m < uint64(len(level)) { R = level[m+1].fc //R = level[m].fc @@ -604,30 +687,35 @@ func (a *btAlloc) findNode(ik uint64) *node { R = uint64(len(a.nodes[l+1]) - 1) } } - fmt.Printf("range={%+v} (%d, %d) L=%d naccess=%d\n", lhn, L, R, l, a.naccess) + fmt.Printf("range={%+v} (%d, %d) L=%d naccess=%d\n", ln, L, R, l, a.naccess) } - if ik < lhn.d { + switch bytes.Compare(ik, ln.key) { + case -1: L = 0 - } else if ik == lhn.d { - fmt.Printf("last found key %d naccess=%d\n", lhn.d, a.naccess) - return nil - } else { - L = lhn.d + case 0: + fmt.Printf("last found key %d naccess=%d\n", ln.d, a.naccess) + return newCursor(context.TODO(), ln.key, val, ln.d), nil + case 1: + L = ln.d } - a.naccess = 0 - mk, _, found := a.bs(a.d-1, ik, L, a.nodes[a.d-1][m+1].d, true) - if found { + a.naccess = 0 // reset count before actually go to storage + cursor, err := a.bsKey(ik, L, a.nodes[a.d-1][m+1].d) + if err != nil { //if trace { - fmt.Printf("last found key %d naccess=%d\n", mk, a.naccess) + fmt.Printf("key %+v not found\n", ik) //} //return true + } else { + fmt.Printf("last found key %+v naccess=%d [%v]\n", cursor.key, a.naccess, err) + return cursor, nil } - return nil + return nil, fmt.Errorf("key not found") } +// deprecated func (a *btAlloc) lookup(ik uint64) bool { trace, direct, found := true, false, false mk, l, r := uint64(0), uint64(0), uint64(len(a.nodes[0])-1) @@ -671,6 +759,7 @@ func (a *btAlloc) lookup(ik uint64) bool { return false } +// deprecated func (a *btAlloc) search(ik uint64) bool { l, r := uint64(0), uint64(len(a.nodes[0])) lr, hr := uint64(0), a.N @@ -749,14 +838,345 @@ func (a *btAlloc) search(ik uint64) bool { func (a *btAlloc) printSearchMx() { for i, n := range a.nodes { fmt.Printf("D%d |%d| ", i, len(n)) - for _, s := range n { + for j, s := range n { fmt.Printf("%d ", s.d) + if s.d >= a.K { + break + } + + kb, v, err := a.dataLookup(s.d) + if err != nil { + fmt.Printf("d %d not found %v\n", s.d, err) + } + a.nodes[i][j].key = common.Copy(kb) + a.nodes[i][j].val = common.Copy(v) } fmt.Printf("\n") } } -func OpenBtreeIndex(indexPath string) (*BtIndex, error) { +// BtIndexReader encapsulates Hash128 to allow concurrent access to Index +type BtIndexReader struct { + index *BtIndex +} + +func NewBtIndexReader(index *BtIndex) *BtIndexReader { + return &BtIndexReader{ + index: index, + } +} + +// Lookup wraps index Lookup +func (r *BtIndexReader) Lookup(key []byte) uint64 { + if r.index != nil { + return r.index.Lookup(key) + } + return 0 +} + +func (r *BtIndexReader) Lookup2(key1, key2 []byte) uint64 { + fk := make([]byte, 52) + copy(fk[:length.Addr], key1) + copy(fk[length.Addr:], key2) + + if r.index != nil { + return r.index.Lookup(fk) + } + return 0 +} + +func (r *BtIndexReader) Seek(x []byte) (*Cursor, error) { + if r.index != nil { + cursor, err := r.index.alloc.seek(x) + if err != nil { + return nil, err + } + cursor.ix = r.index + return cursor, nil + } + return nil, fmt.Errorf("seek has been failed") +} + +func (r *BtIndexReader) Empty() bool { + return r.index.Empty() +} + +type BtIndexWriter struct { + built bool + lvl log.Lvl + maxOffset uint64 + prevOffset uint64 + delta uint64 + minDelta uint64 + batchSizeLimit uint64 + indexW *bufio.Writer + indexF *os.File + offsetEf *eliasfano32.EliasFano // Elias Fano instance for encoding the offsets + bucketCollector *etl.Collector // Collector that sorts by buckets + indexFileName string + indexFile string + tmpDir string + salt uint32 // Murmur3 hash used for converting keys to 64-bit values and assigning to buckets + keyBuf []byte + numBuf []byte + keyCount uint64 + keySize int + etlBufLimit datasize.ByteSize + bytesPerRec int + + // fot batch processing + //keys []uint64 + //vals []uint64 +} + +type BtIndexWriterArgs struct { + // Whether two level index needs to be built, where perfect hash map points to an enumeration, and enumeration points to offsets + // if Enum=false: can have unsorted and duplicated values + // if Enum=true: must have sorted values (can have duplicates) - monotonically growing sequence + Enums bool // todo only support true mode + + IndexFile string // File name where the index and the minimal perfect hash function will be written to + TmpDir string + //StartSeed []uint64 // For each level of recursive split, the hash seed (salt) used for that level - need to be generated randomly and be large enough to accomodate all the levels + KeyCount int + EtlBufLimit datasize.ByteSize + Salt uint32 // Hash seed (salt) for the hash function used for allocating the initial buckets - need to be generated randomly +} + +const BtreeLogPrefix = "btree" + +// NewBtIndexWriter creates a new BtIndexWriter instance with given number of keys +// Typical bucket size is 100 - 2048, larger bucket sizes result in smaller representations of hash functions, at a cost of slower access +// salt parameters is used to randomise the hash function construction, to ensure that different Erigon instances (nodes) +// are likely to use different hash function, to collision attacks are unlikely to slow down any meaningful number of nodes at the same time +func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { + btw := &BtIndexWriter{} + btw.salt = args.Salt + if btw.salt == 0 { + seedBytes := make([]byte, 4) + if _, err := rand.Read(seedBytes); err != nil { + return nil, err + } + btw.salt = binary.BigEndian.Uint32(seedBytes) + } + btw.tmpDir = args.TmpDir + btw.indexFile = args.IndexFile + _, fname := filepath.Split(btw.indexFile) + btw.indexFileName = fname + //btw.baseDataID = args.BaseDataID + btw.etlBufLimit = args.EtlBufLimit + if btw.etlBufLimit == 0 { + btw.etlBufLimit = etl.BufferOptimalSize + } + btw.bucketCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) + btw.bucketCollector.LogLvl(log.LvlDebug) + //btw.offsetCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) + //btw.offsetCollector.LogLvl(log.LvlDebug) + + btw.maxOffset = 0 + return btw, nil +} + +// loadFuncBucket is required to satisfy the type etl.LoadFunc type, to use with collector.Load +func (btw *BtIndexWriter) loadFuncBucket(k, v []byte, _ etl.CurrentTableReader, _ etl.LoadNextFunc) error { + // k is the BigEndian encoding of the bucket number, and the v is the key that is assigned into that bucket + //if uint64(len(btw.vals)) >= btw.batchSizeLimit { + // if err := btw.drainBatch(); err != nil { + // return err + // } + //} + + if _, err := btw.indexW.Write(k); err != nil { + return err + } + if _, err := btw.indexW.Write(v[8-btw.bytesPerRec:]); err != nil { + return err + } + + //btw.keys = append(btw.keys, binary.BigEndian.Uint64(k), binary.BigEndian.Uint64(k[8:])) + //btw.vals = append(btw.vals, binary.BigEndian.Uint64(v)) + return nil +} + +// +//func (rs *BtIndexWriter) drainBatch() error { +// // Extend rs.bucketSizeAcc to accomodate current bucket index + 1 +// //for len(rs.bucketSizeAcc) <= int(rs.currentBucketIdx)+1 { +// // rs.bucketSizeAcc = append(rs.bucketSizeAcc, rs.bucketSizeAcc[len(rs.bucketSizeAcc)-1]) +// //} +// //rs.bucketSizeAcc[int(rs.currentBucketIdx)+1] += uint64(len(rs.currentBucket)) +// //// Sets of size 0 and 1 are not further processed, just write them to index +// //if len(rs.currentBucket) > 1 { +// // for i, key := range rs.currentBucket[1:] { +// // if key == rs.currentBucket[i] { +// // rs.collision = true +// // return fmt.Errorf("%w: %x", ErrCollision, key) +// // } +// // } +// // bitPos := rs.gr.bitCount +// // if rs.buffer == nil { +// // rs.buffer = make([]uint64, len(rs.currentBucket)) +// // rs.offsetBuffer = make([]uint64, len(rs.currentBucketOffs)) +// // } else { +// // for len(rs.buffer) < len(rs.currentBucket) { +// // rs.buffer = append(rs.buffer, 0) +// // rs.offsetBuffer = append(rs.offsetBuffer, 0) +// // } +// // } +// // unary, err := rs.recsplit(0 /* level */, rs.currentBucket, rs.currentBucketOffs, nil /* unary */) +// // if err != nil { +// // return err +// // } +// // rs.gr.appendUnaryAll(unary) +// // if rs.trace { +// // fmt.Printf("recsplitBucket(%d, %d, bitsize = %d)\n", rs.currentBucketIdx, len(rs.currentBucket), rs.gr.bitCount-bitPos) +// // } +// //} else { +// var j int +// for _, offset := range rs.vals { +// binary.BigEndian.PutUint64(rs.numBuf[:], offset) +// rs.indexW.Write(rs.keys[j]) +// if _, err := rs.indexW.Write(rs.numBuf[8-rs.bytesPerRec:]); err != nil { +// return err +// } +// } +// //} +// //// Extend rs.bucketPosAcc to accomodate current bucket index + 1 +// //for len(rs.bucketPosAcc) <= int(rs.currentBucketIdx)+1 { +// // rs.bucketPosAcc = append(rs.bucketPosAcc, rs.bucketPosAcc[len(rs.bucketPosAcc)-1]) +// //} +// //rs.bucketPosAcc[int(rs.currentBucketIdx)+1] = uint64(rs.gr.Bits()) +// rs.keys = rs.keys[:0] +// rs.vals = rs.vals[:0] +// return nil +//} + +// Build has to be called after all the keys have been added, and it initiates the process +// of building the perfect hash function and writing index into a file +func (btw *BtIndexWriter) Build() error { + tmpIdxFilePath := btw.indexFile + ".tmp" + + if btw.built { + return fmt.Errorf("already built") + } + //if btw.keysAdded != btw.keyCount { + // return fmt.Errorf("expected keys %d, got %d", btw.keyCount, btw.keysAdded) + //} + var err error + if btw.indexF, err = os.Create(tmpIdxFilePath); err != nil { + return fmt.Errorf("create index file %s: %w", btw.indexFile, err) + } + defer btw.indexF.Sync() + defer btw.indexF.Close() + btw.indexW = bufio.NewWriterSize(btw.indexF, etl.BufIOSize) + defer btw.indexW.Flush() + // Write minimal app-specific dataID in this index file + //binary.BigEndian.PutUint64(btw.numBuf[:], btw.baseDataID) + //if _, err = btw.indexW.Write(btw.numBuf[:]); err != nil { + // return fmt.Errorf("write baseDataID: %w", err) + //} + + // Write number of keys + binary.BigEndian.PutUint64(btw.numBuf[:], btw.keyCount) + if _, err = btw.indexW.Write(btw.numBuf[:]); err != nil { + return fmt.Errorf("write number of keys: %w", err) + } + // Write number of bytes per index record + btw.bytesPerRec = (bits.Len64(btw.maxOffset) + 7) / 8 + if err = btw.indexW.WriteByte(byte(btw.bytesPerRec)); err != nil { + return fmt.Errorf("write bytes per record: %w", err) + } + + binary.BigEndian.PutUint32(btw.numBuf[:], btw.salt) + if _, err := btw.indexW.Write(btw.numBuf[:4]); err != nil { + return fmt.Errorf("writing salt: %w", err) + } + + defer btw.bucketCollector.Close() + log.Log(btw.lvl, "[index] calculating", "file", btw.indexFileName) + if err := btw.bucketCollector.Load(nil, "", btw.loadFuncBucket, etl.TransformArgs{}); err != nil { + return err + } + + //if ASSERT { + // btw.indexW.Flush() + // btw.indexF.Seek(0, 0) + // b, _ := io.ReadAll(btw.indexF) + // if len(b) != 9+int(btw.keysAdded)*btw.bytesPerRec { + // panic(fmt.Errorf("expected: %d, got: %d; btw.keysAdded=%d, btw.bytesPerRec=%d, %s", 9+int(btw.keysAdded)*btw.bytesPerRec, len(b), btw.keysAdded, btw.bytesPerRec, btw.indexFile)) + // } + //} + + log.Log(btw.lvl, "[index] write", "file", btw.indexFileName) + btw.built = true + + _ = btw.indexW.Flush() + _ = btw.indexF.Sync() + _ = btw.indexF.Close() + _ = os.Rename(tmpIdxFilePath, btw.indexFile) + return nil +} + +func (btw *BtIndexWriter) Close() { + if btw.indexF != nil { + btw.indexF.Close() + } + if btw.bucketCollector != nil { + btw.bucketCollector.Close() + } + //if btw.offsetCollector != nil { + // btw.offsetCollector.Close() + //} +} + +func (btw *BtIndexWriter) Add(key, value []byte) error { + +} + +func (btw *BtIndexWriter) AddKey(key []byte, offset uint64) error { + if btw.built { + return fmt.Errorf("cannot add keys after perfect hash function had been built") + } + if len(key) != btw.keySize { + return fmt.Errorf("invalid key size %d while expected %d", len(key), btw.keySize) + } + + binary.BigEndian.PutUint64(btw.numBuf[:], offset) + if offset > btw.maxOffset { + btw.maxOffset = offset + } + if btw.keyCount > 0 { + delta := offset - btw.prevOffset + if btw.keyCount == 1 || delta < btw.minDelta { + btw.minDelta = delta + } + } + + if err := btw.bucketCollector.Collect(key[:], btw.numBuf[:]); err != nil { + return err + } + btw.keyCount++ + btw.prevOffset = offset + return nil +} + +type BtIndex struct { + alloc *btAlloc + mmapWin *[mmap.MaxMapSize]byte + mmapUnix []byte + data []byte + file *os.File + size int64 + modTime time.Time + filePath string + keyCount uint64 + keySize int + baseDataID uint64 + bytesPerRec int + dataoffset uint64 +} + +func OpenBtreeIndex(indexPath string, M uint64) (*BtIndex, error) { s, err := os.Stat(indexPath) if err != nil { return nil, err @@ -766,7 +1186,6 @@ func OpenBtreeIndex(indexPath string) (*BtIndex, error) { filePath: indexPath, size: s.Size(), modTime: s.ModTime(), - //idx: btree.NewG[uint64](32, commitmentItemLess), } idx.file, err = os.Open(indexPath) @@ -779,15 +1198,54 @@ func OpenBtreeIndex(indexPath string) (*BtIndex, error) { } idx.data = idx.mmapUnix[:idx.size] // Read number of keys and bytes per record - idx.baseDataID = binary.BigEndian.Uint64(idx.data[:8]) - idx.keyCount = binary.BigEndian.Uint64(idx.data[8:16]) + pos := 8 + idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) + //idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) + idx.bytesPerRec = int(binary.BigEndian.Uint16(idx.data[pos:])) + pos += 2 + + idx.keySize = int(binary.BigEndian.Uint16(idx.data[pos:])) + pos += 2 + + offset := int(idx.keyCount)*idx.bytesPerRec + (idx.keySize * int(idx.keyCount)) + if offset < 0 { + return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) + } + + //p := (*[]byte)(unsafe.Pointer(&idx.data[pos])) + //l := int(idx.keyCount)*idx.bytesPerRec + (16 * int(idx.keyCount)) + //idx.alloc.data = p[:l] + + idx.alloc = newBtAlloc(idx.keyCount, M) + idx.alloc.dataLookup = idx.dataLookup + idx.dataoffset = uint64(pos) + idx.alloc.traverseDfs() + idx.alloc.printSearchMx() return idx, nil } +func (b *BtIndex) dataLookup(di uint64) ([]byte, []byte, error) { + if b.keyCount <= di { + return nil, nil, fmt.Errorf("ki is greater than key count in index") + } + + p := b.dataoffset + di*uint64(b.bytesPerRec) + uint64(b.keySize)*di + if uint64(len(b.data)) < p+uint64(b.keySize)+uint64(b.bytesPerRec) { + return nil, nil, fmt.Errorf("data lookup gone too far (%d after %d)", p+16+uint64(b.bytesPerRec)-uint64(len(b.data)), len(b.data)) + } + key := b.data[p : p+uint64(b.keySize)] + p += uint64(b.keySize) + vo := b.data[p : p+uint64(b.bytesPerRec)] + + b.data[vo:] + return key, val, nil +} + func (b *BtIndex) Size() int64 { return b.size } func (b *BtIndex) ModTime() time.Time { return b.modTime } +// Deprecated func (b *BtIndex) BaseDataID() uint64 { return b.baseDataID } func (b *BtIndex) FilePath() string { return b.filePath } @@ -811,9 +1269,12 @@ func (b *BtIndex) Close() error { return nil } -func (b *BtIndex) Lookup(bucketHash, fingerprint uint64) uint64 { - //TODO implement me - panic("implement me") +func (b *BtIndex) Lookup(key []byte) uint64 { + cursor, err := b.alloc.seek(key) + if err != nil { + panic(err) + } + return binary.BigEndian.Uint64(cursor.value) } func (b *BtIndex) OrdinalLookup(i uint64) uint64 { @@ -826,11 +1287,6 @@ func (b *BtIndex) ExtractOffsets() map[uint64]uint64 { panic("implement me") } -func (b *BtIndex) RewriteWithOffsets(w *bufio.Writer, m map[uint64]uint64) error { - //TODO implement me - panic("implement me") -} - func (b *BtIndex) DisableReadAhead() { //TODO implement me panic("implement me") From 0304652ecafef7bbe63963fe06ca9fdcbc46b9c7 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 3 Feb 2023 13:04:17 +0000 Subject: [PATCH 03/54] replayed merge/collate split by two independent mergers --- state/aggregator.go | 279 +++++++++++++++++---------------- state/aggregator_bench_test.go | 36 +---- state/aggregator_test.go | 96 ++++++------ state/domain.go | 78 +++++---- state/domain_committed.go | 3 - state/merge.go | 79 ++++------ 6 files changed, 275 insertions(+), 296 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index c817f1b84..a42e48411 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -55,7 +55,7 @@ type Aggregator struct { txNum uint64 seekTxNum uint64 blockNum uint64 - commitFn func(txNum uint64) error + stepDoneNotice chan [length.Hash]byte rwTx kv.RwTx stats FilesStats tmpdir string @@ -67,7 +67,7 @@ func NewAggregator( aggregationStep uint64, ) (*Aggregator, error) { - a := &Aggregator{aggregationStep: aggregationStep, tmpdir: tmpdir} + a := &Aggregator{aggregationStep: aggregationStep, tmpdir: tmpdir, stepDoneNotice: make(chan [length.Hash]byte, 1)} closeAgg := true defer func() { @@ -135,6 +135,9 @@ func (a *Aggregator) GetAndResetStats() DomainStats { } func (a *Aggregator) Close() { + if a.stepDoneNotice != nil { + close(a.stepDoneNotice) + } if a.accounts != nil { a.accounts.Close() } @@ -244,22 +247,19 @@ func (a *Aggregator) SeekCommitment() (txNum uint64, err error) { } func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { - defer func(t time.Time) { - log.Info("[snapshots] aggregation step is done", "step", step, "took", time.Since(t)) - }(time.Now()) - var ( logEvery = time.NewTicker(time.Second * 30) wg sync.WaitGroup errCh = make(chan error, 8) - //maxSpan = StepsInBiggestFile * a.aggregationStep - txFrom = step * a.aggregationStep - txTo = (step + 1) * a.aggregationStep - //workers = 1 + maxSpan = StepsInBiggestFile * a.aggregationStep + txFrom = step * a.aggregationStep + txTo = (step + 1) * a.aggregationStep + workers = 1 ) + defer logEvery.Stop() - for _, d := range []*Domain{a.accounts, a.storage, a.code, a.commitment.Domain} { + for i, d := range []*Domain{a.accounts, a.storage, a.code, a.commitment.Domain} { wg.Add(1) collation, err := d.collate(ctx, step, txFrom, txTo, d.tx, logEvery) @@ -271,10 +271,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { go func(wg *sync.WaitGroup, d *Domain, collation Collation) { defer wg.Done() - defer func(t time.Time) { - log.Info("[snapshots] domain collate-build is done", "took", time.Since(t), "domain", d.filenameBase) - }(time.Now()) - + start := time.Now() sf, err := d.buildFiles(ctx, step, collation) collation.Close() if err != nil { @@ -284,8 +281,14 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } d.integrateFiles(sf, step*a.aggregationStep, (step+1)*a.aggregationStep) + d.stats.LastFileBuildingTook = time.Since(start) }(&wg, d, collation) + if i != 3 { // do not warmup commitment domain + if err := d.warmup(txFrom, d.aggregationStep/10, d.tx); err != nil { + return fmt.Errorf("warmup %q domain failed: %w", d.filenameBase, err) + } + } if err := d.prune(ctx, step, txFrom, txTo, math.MaxUint64, logEvery); err != nil { return err } @@ -301,9 +304,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { go func(wg *sync.WaitGroup, d *InvertedIndex, tx kv.Tx) { defer wg.Done() - defer func(t time.Time) { - log.Info("[snapshots] index collate-build is done", "took", time.Since(t), "domain", d.filenameBase) - }(time.Now()) sf, err := d.buildFiles(ctx, step, collation) if err != nil { @@ -312,6 +312,12 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return } d.integrateFiles(sf, step*a.aggregationStep, (step+1)*a.aggregationStep) + + mm := d.endTxNumMinimax() + if err := d.mergeRangesUpTo(ctx, mm, maxSpan, workers); err != nil { + errCh <- err + return + } }(&wg, d, d.tx) if err := d.prune(ctx, txFrom, txTo, math.MaxUint64, logEvery); err != nil { @@ -329,6 +335,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return fmt.Errorf("domain collate-build failed: %w", err) } + // TODO questionable ac := a.MakeContext() defer ac.Close() @@ -347,6 +354,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, workers int) (somethingDone bool, err error) { closeAll := true + mergeStartedAt := time.Now() + maxSpan := a.aggregationStep * StepsInBiggestFile r := a.findMergeRange(maxEndTxNum, maxSpan) if !r.any() { @@ -374,30 +383,68 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work }() a.integrateMergedFiles(outs, in) closeAll = false + + var clo, chi, plo, phi, blo, bhi time.Duration + clo, plo, blo = time.Hour*99, time.Hour*99, time.Hour*99 + for _, s := range []DomainStats{a.accounts.stats, a.code.stats, a.storage.stats} { + c := s.LastCollationTook + p := s.LastPruneTook + b := s.LastFileBuildingTook + + if c < clo { + clo = c + } + if c > chi { + chi = c + } + if p < plo { + plo = p + } + if p > phi { + phi = p + } + if b < blo { + blo = b + } + if b > bhi { + bhi = b + } + } + + log.Info("[stat] finished merge details", + // "step", step, + // "range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5), + "upto_tx", maxEndTxNum, "merge_took", time.Since(mergeStartedAt), + "step_took", time.Since(stepStartedAt), + "collate_min", clo, "collate_max", chi, + "prune_min", plo, "prune_max", phi, + "files_build_min", blo, "files_build_max", bhi) + + return nil return true, nil } type Ranges struct { - accounts DomainRanges - storage DomainRanges - code DomainRanges - commitment DomainRanges - logTopicsEndTxNum uint64 - logAddrsEndTxNum uint64 - logTopicsStartTxNum uint64 - logAddrsStartTxNum uint64 - tracesFromStartTxNum uint64 - tracesFromEndTxNum uint64 - tracesToStartTxNum uint64 - tracesToEndTxNum uint64 - logAddrs bool - logTopics bool - tracesFrom bool - tracesTo bool + accounts DomainRanges + storage DomainRanges + code DomainRanges + commitment DomainRanges + //logTopicsEndTxNum uint64 + //logAddrsEndTxNum uint64 + //logTopicsStartTxNum uint64 + //logAddrsStartTxNum uint64 + //tracesFromStartTxNum uint64 + //tracesFromEndTxNum uint64 + //tracesToStartTxNum uint64 + //tracesToEndTxNum uint64 + //logAddrs bool + //logTopics bool + //tracesFrom bool + //tracesTo bool } func (r Ranges) any() bool { - return r.accounts.any() || r.storage.any() || r.code.any() || r.commitment.any() //|| r.logAddrs || r.logTopics || r.tracesFrom || r.tracesTo + return r.accounts.any() || r.storage.any() || r.code.any() || r.commitment.any() } func (a *Aggregator) findMergeRange(maxEndTxNum, maxSpan uint64) Ranges { @@ -406,11 +453,7 @@ func (a *Aggregator) findMergeRange(maxEndTxNum, maxSpan uint64) Ranges { r.storage = a.storage.findMergeRange(maxEndTxNum, maxSpan) r.code = a.code.findMergeRange(maxEndTxNum, maxSpan) r.commitment = a.commitment.findMergeRange(maxEndTxNum, maxSpan) - r.logAddrs, r.logAddrsStartTxNum, r.logAddrsEndTxNum = a.logAddrs.findMergeRange(maxEndTxNum, maxSpan) - r.logTopics, r.logTopicsStartTxNum, r.logTopicsEndTxNum = a.logTopics.findMergeRange(maxEndTxNum, maxSpan) - r.tracesFrom, r.tracesFromStartTxNum, r.tracesFromEndTxNum = a.tracesFrom.findMergeRange(maxEndTxNum, maxSpan) - r.tracesTo, r.tracesToStartTxNum, r.tracesToEndTxNum = a.tracesTo.findMergeRange(maxEndTxNum, maxSpan) - //log.Info(fmt.Sprintf("findMergeRange(%d, %d)=%+v\n", maxEndTxNum, maxSpan, r)) + log.Info(fmt.Sprintf("findMergeRange(%d, %d)=%+v\n", maxEndTxNum, maxSpan, r)) return r } @@ -427,18 +470,18 @@ type SelectedStaticFiles struct { commitment []*filesItem commitmentIdx []*filesItem commitmentHist []*filesItem - tracesTo []*filesItem - tracesFrom []*filesItem - logTopics []*filesItem - logAddrs []*filesItem - codeI int - storageI int - accountsI int - commitmentI int - logAddrsI int - tracesFromI int - logTopicsI int - tracesToI int + //tracesTo []*filesItem + //tracesFrom []*filesItem + //logTopics []*filesItem + //logAddrs []*filesItem + codeI int + storageI int + accountsI int + commitmentI int + //logAddrsI int + //tracesFromI int + //logTopicsI int + //tracesToI int } func (sf SelectedStaticFiles) Close() { @@ -476,18 +519,6 @@ func (a *Aggregator) staticFilesInRange(r Ranges, ac *AggregatorContext) Selecte if r.commitment.any() { sf.commitment, sf.commitmentIdx, sf.commitmentHist, sf.commitmentI = a.commitment.staticFilesInRange(r.commitment, ac.commitment) } - if r.logAddrs { - sf.logAddrs, sf.logAddrsI = a.logAddrs.staticFilesInRange(r.logAddrsStartTxNum, r.logAddrsEndTxNum, ac.logAddrs) - } - if r.logTopics { - sf.logTopics, sf.logTopicsI = a.logTopics.staticFilesInRange(r.logTopicsStartTxNum, r.logTopicsEndTxNum, ac.logTopics) - } - if r.tracesFrom { - sf.tracesFrom, sf.tracesFromI = a.tracesFrom.staticFilesInRange(r.tracesFromStartTxNum, r.tracesFromEndTxNum, ac.tracesFrom) - } - if r.tracesTo { - sf.tracesTo, sf.tracesToI = a.tracesTo.staticFilesInRange(r.tracesToStartTxNum, r.tracesToEndTxNum, ac.tracesTo) - } return sf } @@ -500,10 +531,10 @@ type MergedFiles struct { codeIdx, codeHist *filesItem commitment *filesItem commitmentIdx, commitmentHist *filesItem - logAddrs *filesItem - logTopics *filesItem - tracesFrom *filesItem - tracesTo *filesItem + //logAddrs *filesItem + //logTopics *filesItem + //tracesFrom *filesItem + //tracesTo *filesItem } func (mf MergedFiles) Close() { @@ -526,7 +557,13 @@ func (mf MergedFiles) Close() { } func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, r Ranges, workers int) (MergedFiles, error) { - defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) + started := time.Now() + defer func(t time.Time) { + log.Info("[snapshots] domain files has been merged", + "range", fmt.Sprintf("%d-%d", r.accounts.valuesStartTxNum/a.aggregationStep, r.accounts.valuesEndTxNum/a.aggregationStep), + "took", time.Since(t)) + }(started) + var mf MergedFiles closeFiles := true defer func() { @@ -536,15 +573,15 @@ func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, }() var ( - errCh = make(chan error, 8) + errCh = make(chan error, 4) wg sync.WaitGroup predicates sync.WaitGroup ) + wg.Add(4) predicates.Add(2) - wg.Add(8) - go func() { + go func(predicates *sync.WaitGroup) { defer wg.Done() defer predicates.Done() var err error @@ -553,8 +590,8 @@ func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, errCh <- err } } - }() - go func() { + }(&predicates) + go func(predicates *sync.WaitGroup) { defer wg.Done() defer predicates.Done() var err error @@ -563,9 +600,10 @@ func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, errCh <- err } } - }() + }(&predicates) go func() { defer wg.Done() + var err error if r.code.any() { if mf.code, mf.codeIdx, mf.codeHist, err = a.code.mergeFiles(ctx, files.code, files.codeIdx, files.codeHist, r.code, workers); err != nil { @@ -573,44 +611,8 @@ func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, } } }() - go func() { - defer wg.Done() - var err error - if r.logAddrs { - if mf.logAddrs, err = a.logAddrs.mergeFiles(ctx, files.logAddrs, r.logAddrsStartTxNum, r.logAddrsEndTxNum, workers); err != nil { - errCh <- err - } - } - }() - go func() { - defer wg.Done() - var err error - if r.logTopics { - if mf.logTopics, err = a.logTopics.mergeFiles(ctx, files.logTopics, r.logTopicsStartTxNum, r.logTopicsEndTxNum, workers); err != nil { - errCh <- err - } - } - }() - go func() { - defer wg.Done() - var err error - if r.tracesFrom { - if mf.tracesFrom, err = a.tracesFrom.mergeFiles(ctx, files.tracesFrom, r.tracesFromStartTxNum, r.tracesFromEndTxNum, workers); err != nil { - errCh <- err - } - } - }() - go func() { - defer wg.Done() - var err error - if r.tracesTo { - if mf.tracesTo, err = a.tracesTo.mergeFiles(ctx, files.tracesTo, r.tracesToStartTxNum, r.tracesToEndTxNum, workers); err != nil { - errCh <- err - } - } - }() - go func() { + go func(preidcates *sync.WaitGroup) { defer wg.Done() predicates.Wait() @@ -621,11 +623,11 @@ func (a *Aggregator) mergeFiles(ctx context.Context, files SelectedStaticFiles, errCh <- err } } - }() + + }(&predicates) go func() { wg.Wait() - close(errCh) }() @@ -644,10 +646,6 @@ func (a *Aggregator) integrateMergedFiles(outs SelectedStaticFiles, in MergedFil a.storage.integrateMergedFiles(outs.storage, outs.storageIdx, outs.storageHist, in.storage, in.storageIdx, in.storageHist) a.code.integrateMergedFiles(outs.code, outs.codeIdx, outs.codeHist, in.code, in.codeIdx, in.codeHist) a.commitment.integrateMergedFiles(outs.commitment, outs.commitmentIdx, outs.commitmentHist, in.commitment, in.commitmentIdx, in.commitmentHist) - a.logAddrs.integrateMergedFiles(outs.logAddrs, in.logAddrs) - a.logTopics.integrateMergedFiles(outs.logTopics, in.logTopics) - a.tracesFrom.integrateMergedFiles(outs.tracesFrom, in.tracesFrom) - a.tracesTo.integrateMergedFiles(outs.tracesTo, in.tracesTo) } func (ac *AggregatorContext) ReadAccountData(addr []byte, roTx kv.Tx) ([]byte, error) { @@ -817,27 +815,24 @@ func (a *Aggregator) ReadyToFinishTx() bool { return (a.txNum+1)%a.aggregationStep == 0 && a.seekTxNum < a.txNum } -func (a *Aggregator) SetCommitFn(fn func(txNum uint64) error) { - a.commitFn = fn +// Provides channel which receives commitment hash each time aggregation is occured +func (a *Aggregator) AggregatedRoots() chan [length.Hash]byte { + return a.stepDoneNotice } -func (a *Aggregator) FinishTx() error { +func (a *Aggregator) FinishTx() (err error) { atomic.AddUint64(&a.stats.TxCount, 1) if !a.ReadyToFinishTx() { return nil } - _, err := a.ComputeCommitment(true, false) + rootHash, err := a.ComputeCommitment(true, false) if err != nil { return err } step := a.txNum / a.aggregationStep if step == 0 { - if a.commitFn != nil { - if err := a.commitFn(a.txNum); err != nil { - return fmt.Errorf("aggregator: db commit on finishTx failed, txNum=%d err=%w", a.txNum, err) - } - } + a.notifyAggregated(rootHash) return nil } step-- // Leave one step worth in the DB @@ -850,17 +845,19 @@ func (a *Aggregator) FinishTx() error { return err } - if a.commitFn != nil { - if err := a.commitFn(a.txNum); err != nil { - return err - } - } - - //a.defaultCtx = a.MakeContext() + a.notifyAggregated(rootHash) return nil } +func (a *Aggregator) notifyAggregated(rootHash []byte) { + rh := (*[length.Hash]byte)(rootHash[:]) + select { + case a.stepDoneNotice <- *rh: + default: + } +} + func (a *Aggregator) UpdateAccountData(addr []byte, account []byte) error { a.commitment.TouchPlainKey(addr, account, a.commitment.TouchPlainKeyAccount) return a.accounts.Put(addr, nil, account) @@ -989,10 +986,13 @@ func (a *Aggregator) Flush(ctx context.Context) error { } type FilesStats struct { - TxCount uint64 - FilesCount uint64 - IdxSize uint64 - DataSize uint64 + HistoryReads uint64 + TotalReads uint64 + IdxAccess time.Duration + TxCount uint64 + FilesCount uint64 + IdxSize uint64 + DataSize uint64 } func (a *Aggregator) Stats() FilesStats { @@ -1001,6 +1001,9 @@ func (a *Aggregator) Stats() FilesStats { res.IdxSize = stat.IndexSize res.DataSize = stat.DataSize res.FilesCount = stat.FilesCount + res.HistoryReads = stat.HistoryQueries + res.TotalReads = stat.TotalQueries + res.IdxAccess = stat.EfSearchTime return res } diff --git a/state/aggregator_bench_test.go b/state/aggregator_bench_test.go index 830392cd3..19374692f 100644 --- a/state/aggregator_bench_test.go +++ b/state/aggregator_bench_test.go @@ -33,11 +33,10 @@ func BenchmarkAggregator_Processing(b *testing.B) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - //keys := queueKeys(ctx, 42, length.Addr) longKeys := queueKeys(ctx, 64, length.Addr+length.Hash) vals := queueKeys(ctx, 53, length.Hash) - aggStep := uint64(100_000) + aggStep := uint64(100_00) _, db, agg := testDbAndAggregatorBench(b, length.Addr, aggStep) tx, err := db.BeginRw(ctx) @@ -46,28 +45,8 @@ func BenchmarkAggregator_Processing(b *testing.B) { if tx != nil { tx.Rollback() } - if agg != nil { - agg.Close() - } }() - commit := func(txN uint64) (err error) { - err = tx.Commit() - require.NoError(b, err) - if err != nil { - return err - } - - tx = nil - tx, err = db.BeginRw(ctx) - require.NoError(b, err) - if err != nil { - return err - } - agg.SetTx(tx) - return nil - } - agg.SetCommitFn(commit) agg.SetTx(tx) defer agg.StartWrites().FinishWrites() require.NoError(b, err) @@ -76,19 +55,12 @@ func BenchmarkAggregator_Processing(b *testing.B) { b.ReportAllocs() b.ResetTimer() - //keyList := make([][]byte, 20000) + for i := 0; i < b.N; i++ { - //var key []byte - //if i >= len(keyList) { - // pi := i % (len(keyList)) - // key = keyList[pi] - //} else { - // key = <-longKeys - // keyList[i] = key - //} key := <-longKeys val := <-vals - agg.SetTxNum(uint64(i)) + txNum := uint64(i) + agg.SetTxNum(txNum) err := agg.WriteAccountStorage(key[:length.Addr], key[length.Addr:], val) require.NoError(b, err) err = agg.FinishTx() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 0f664e805..427e8b738 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -51,15 +51,34 @@ func TestAggregator_Merge(t *testing.T) { defer agg.StartWrites().FinishWrites() txs := uint64(10000) + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) // keys are encodings of numbers 1..31 // each key changes value on every txNum which is multiple of the key var maxWrite, otherMaxWrite uint64 for txNum := uint64(1); txNum <= txs; txNum++ { agg.SetTxNum(txNum) + + addr, loc := make([]byte, length.Addr), make([]byte, length.Hash) + + n, err := rnd.Read(addr) + require.NoError(t, err) + require.EqualValues(t, length.Addr, n) + + n, err = rnd.Read(loc) + require.NoError(t, err) + require.EqualValues(t, length.Hash, n) + //keys[txNum-1] = append(addr, loc...) + + buf := EncodeAccountBytes(1, uint256.NewInt(0), nil, 0) + err = agg.UpdateAccountData(addr, buf) + require.NoError(t, err) + + err = agg.WriteAccountStorage(addr, loc, []byte{addr[0], loc[0]}) + require.NoError(t, err) + var v [8]byte binary.BigEndian.PutUint64(v[:], txNum) - var err error if txNum%135 == 0 { err = agg.UpdateCommitmentData([]byte("otherroothash"), v[:]) otherMaxWrite = txNum @@ -109,28 +128,13 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { if tx != nil { tx.Rollback() } - if agg != nil { - agg.Close() - } }() agg.SetTx(tx) defer agg.StartWrites().FinishWrites() var latestCommitTxNum uint64 - commit := func(txn uint64) error { - err = agg.Flush(context.Background()) - require.NoError(t, err) - err = tx.Commit() - require.NoError(t, err) - tx, err = db.BeginRw(context.Background()) - require.NoError(t, err) - t.Logf("commit to db txn=%d", txn) - atomic.StoreUint64(&latestCommitTxNum, txn) - agg.SetTx(tx) - return nil - } - agg.SetCommitFn(commit) + rnd := rand.New(rand.NewSource(time.Now().Unix())) txs := (aggStep / 2) * 19 t.Logf("step=%d tx_count=%d", aggStep, txs) @@ -142,7 +146,25 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { agg.SetTxNum(txNum) binary.BigEndian.PutUint64(aux[:], txNum) + addr, loc := make([]byte, length.Addr), make([]byte, length.Hash) + n, err := rnd.Read(addr) + require.NoError(t, err) + require.EqualValues(t, length.Addr, n) + + n, err = rnd.Read(loc) + require.NoError(t, err) + require.EqualValues(t, length.Hash, n) + //keys[txNum-1] = append(addr, loc...) + + buf := EncodeAccountBytes(1, uint256.NewInt(0), nil, 0) + err = agg.UpdateAccountData(addr, buf) + require.NoError(t, err) + + err = agg.WriteAccountStorage(addr, loc, []byte{addr[0], loc[0]}) + require.NoError(t, err) + err = agg.UpdateCommitmentData([]byte("key"), aux[:]) + require.NoError(t, err) maxWrite = txNum require.NoError(t, agg.FinishTx()) @@ -151,8 +173,7 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, err) err = tx.Commit() require.NoError(t, err) - agg.Close() - tx, agg = nil, nil + tx = nil // Start another aggregator on same datadir anotherAgg, err := NewAggregator(path, path, aggStep) @@ -203,27 +224,10 @@ func TestAggregator_RestartOnFiles(t *testing.T) { if tx != nil { tx.Rollback() } - if agg != nil { - agg.Close() - } }() agg.SetTx(tx) defer agg.StartWrites().FinishWrites() - var latestCommitTxNum uint64 - commit := func(txn uint64) error { - err = tx.Commit() - require.NoError(t, err) - tx, err = db.BeginRw(context.Background()) - require.NoError(t, err) - t.Logf("commit to db txn=%d", txn) - - atomic.StoreUint64(&latestCommitTxNum, txn) - agg.SetTx(tx) - return nil - } - agg.SetCommitFn(commit) - txs := aggStep * 5 t.Logf("step=%d tx_count=%d", aggStep, txs) @@ -256,10 +260,10 @@ func TestAggregator_RestartOnFiles(t *testing.T) { } err = tx.Commit() + require.NoError(t, err) tx = nil db.Close() db = nil - agg.Close() agg = nil require.NoError(t, os.RemoveAll(filepath.Join(path, "db4"))) @@ -312,7 +316,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { } func TestAggregator_ReplaceCommittedKeys(t *testing.T) { - aggStep := uint64(1000) + aggStep := uint64(10000) path, db, agg := testDbAndAggregator(t, 0, aggStep) defer db.Close() @@ -324,9 +328,6 @@ func TestAggregator_ReplaceCommittedKeys(t *testing.T) { if tx != nil { tx.Rollback() } - if agg != nil { - agg.Close() - } }() agg.SetTx(tx) defer agg.StartWrites().FinishWrites() @@ -343,9 +344,9 @@ func TestAggregator_ReplaceCommittedKeys(t *testing.T) { agg.SetTx(tx) return nil } - agg.SetCommitFn(commit) - txs := aggStep / 2 * 20 + roots := agg.AggregatedRoots() + txs := aggStep / 2 * 50 t.Logf("step=%d tx_count=%d", aggStep, txs) rnd := rand.New(rand.NewSource(0)) @@ -373,6 +374,12 @@ func TestAggregator_ReplaceCommittedKeys(t *testing.T) { err = agg.FinishTx() require.NoError(t, err) + select { + case <-roots: + require.NoError(t, commit(txNum)) + default: + continue + } } half := txs / 2 @@ -403,9 +410,6 @@ func TestAggregator_ReplaceCommittedKeys(t *testing.T) { require.EqualValues(t, key[length.Addr], storedV[1]) } require.NoError(t, err) - - agg.Close() - agg = nil } func Test_EncodeCommitmentState(t *testing.T) { diff --git a/state/domain.go b/state/domain.go index 01a2392e4..a9a48f092 100644 --- a/state/domain.go +++ b/state/domain.go @@ -81,8 +81,13 @@ func filesItemLess(i, j *filesItem) bool { } type DomainStats struct { - MergesCount uint64 + MergesCount uint64 + LastCollationTook time.Duration + LastPruneTook time.Duration + LastFileBuildingTook time.Duration + HistoryQueries uint64 + TotalQueries uint64 EfSearchTime time.Duration DataSize uint64 IndexSize uint64 @@ -91,6 +96,7 @@ type DomainStats struct { func (ds *DomainStats) Accumulate(other DomainStats) { ds.HistoryQueries += other.HistoryQueries + ds.TotalQueries += other.TotalQueries ds.EfSearchTime += other.EfSearchTime ds.IndexSize += other.IndexSize ds.DataSize += other.DataSize @@ -288,6 +294,8 @@ func (d *Domain) Close() { func (dc *DomainContext) get(key []byte, fromTxNum uint64, roTx kv.Tx) ([]byte, bool, error) { //var invertedStep [8]byte + atomic.AddUint64(&dc.d.stats.TotalQueries, 1) + invertedStep := dc.numBuf binary.BigEndian.PutUint64(invertedStep[:], ^(fromTxNum / dc.d.aggregationStep)) keyCursor, err := roTx.CursorDupSort(dc.d.keysTable) @@ -573,6 +581,8 @@ func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) erro if len(prefix) != dc.d.prefixLen { return fmt.Errorf("wrong prefix length, this %s domain supports prefixLen %d, given [%x]", dc.d.filenameBase, dc.d.prefixLen, prefix) } + atomic.AddUint64(&dc.d.stats.HistoryQueries, 1) + var cp CursorHeap heap.Init(&cp) var k, v []byte @@ -689,6 +699,11 @@ func (c Collation) Close() { // and returns compressors, elias fano, and bitmaps // [txFrom; txTo) func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv.Tx, logEvery *time.Ticker) (Collation, error) { + started := time.Now() + defer func() { + d.stats.LastCollationTook = time.Since(started) + }() + hCollation, err := d.History.collate(step, txFrom, txTo, roTx, logEvery) if err != nil { return Collation{}, err @@ -967,14 +982,17 @@ func (d *Domain) integrateFiles(sf StaticFiles, txNumFrom, txNumTo uint64) { // [txFrom; txTo) func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uint64, logEvery *time.Ticker) error { - // It is important to clean up tables in a specific order - // First keysTable, because it is the first one access in the `get` function, i.e. if the record is canDelete from there, other tables will not be accessed + start := time.Now() + defer func() { + d.stats.LastPruneTook = time.Since(start) + }() + keysCursor, err := d.tx.RwCursorDupSort(d.keysTable) if err != nil { return fmt.Errorf("%s keys cursor: %w", d.filenameBase, err) } defer keysCursor.Close() - var k, v []byte + var k, v, stepBytes []byte keyMaxSteps := make(map[string]uint64) for k, v, err = keysCursor.First(); err == nil && k != nil; k, v, err = keysCursor.Next() { @@ -985,17 +1003,21 @@ func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uin log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) return err default: - } - - s := ^binary.BigEndian.Uint64(v) - if maxS, seen := keyMaxSteps[string(k)]; !seen || s > maxS { - keyMaxSteps[string(k)] = s + s := ^binary.BigEndian.Uint64(v) + if maxS, seen := keyMaxSteps[string(k)]; !seen || s > maxS { + keyMaxSteps[string(k)] = s + } + if len(stepBytes) == 0 && step == s { + stepBytes = common.Copy(v) + } } } if err != nil { return fmt.Errorf("iterate of %s keys: %w", d.filenameBase, err) } + // It is important to clean up tables in a specific order + // First keysTable, because it is the first one access in the `get` function, i.e. if the record is deleted from there, other tables will not be accessed for k, v, err = keysCursor.First(); err == nil && k != nil; k, v, err = keysCursor.Next() { select { case <-logEvery.C: @@ -1004,19 +1026,13 @@ func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uin log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) return err default: - } - - s := ^binary.BigEndian.Uint64(v) - if s == step { - if maxS := keyMaxSteps[string(k)]; maxS <= step { - continue - } - if err = keysCursor.DeleteCurrent(); err != nil { - return fmt.Errorf("clean up %s for [%x]=>[%x]: %w", d.filenameBase, k, v, err) - } - - if bytes.HasPrefix(k, keyCommitmentState) { - fmt.Printf("domain prune key %x [s%d] txn=%d\n", string(k), s, ^binary.BigEndian.Uint64(v)) + if bytes.Equal(stepBytes, v) { + if maxS := keyMaxSteps[string(k)]; maxS <= step { + continue + } + if err = keysCursor.DeleteCurrent(); err != nil { + return fmt.Errorf("clean up %s for [%x]=>[%x]: %w", d.filenameBase, k, v, err) + } } } } @@ -1036,16 +1052,14 @@ func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uin log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) return err default: - } - s := ^binary.BigEndian.Uint64(k[len(k)-8:]) - if s == step { - if maxS := keyMaxSteps[string(k[:len(k)-8])]; maxS <= step { - continue - } - if err = valsCursor.DeleteCurrent(); err != nil { - return fmt.Errorf("clean up %s for [%x]: %w", d.filenameBase, k, err) + if bytes.Equal(stepBytes, k[len(k)-8:]) { + if maxS := keyMaxSteps[string(k[len(k)-8:])]; maxS <= step { + continue + } + if err = valsCursor.DeleteCurrent(); err != nil { + return fmt.Errorf("clean up %s for [%x]: %w", d.filenameBase, k, err) + } } - //fmt.Printf("domain prune value for %x (invs %x) [s%d]\n", string(k),k[len(k)-8):], s) } } if err != nil { @@ -1133,6 +1147,8 @@ func (dc *DomainContext) readFromFiles(filekey []byte, fromTxNum uint64) ([]byte // historyBeforeTxNum searches history for a value of specified key before txNum // second return value is true if the value is found in the history (even if it is nil) func (dc *DomainContext) historyBeforeTxNum(key []byte, txNum uint64, roTx kv.Tx) ([]byte, bool, error) { + atomic.AddUint64(&dc.d.stats.HistoryQueries, 1) + var search ctxItem search.startTxNum = txNum search.endTxNum = txNum diff --git a/state/domain_committed.go b/state/domain_committed.go index c217dfce4..f2af5ad84 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -54,7 +54,6 @@ type DomainCommitted struct { commTree *btree.BTreeG[*CommitmentItem] keccak hash.Hash patriciaTrie *commitment.HexPatriciaHashed - keyReplaceFn ValueMerger // defines logic performed with stored values during files merge branchMerger *commitment.BranchMerger } @@ -69,8 +68,6 @@ func NewCommittedDomain(d *Domain, mode CommitmentMode) *DomainCommitted { } } -func (d *DomainCommitted) SetKeyReplacer(vm ValueMerger) { d.keyReplaceFn = vm } - func (d *DomainCommitted) SetCommitmentMode(m CommitmentMode) { d.mode = m } // TouchPlainKey marks plainKey as updated and applies different fn for different key types diff --git a/state/merge.go b/state/merge.go index 4c39a6ab0..5588789cc 100644 --- a/state/merge.go +++ b/state/merge.go @@ -191,43 +191,40 @@ func (s *staticFilesInRange) Close() { } } -/* // nolint +func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int) (err error) { + closeAll := true + for rng := d.findMergeRange(maxSpan, maxTxNum); rng.any(); rng = d.findMergeRange(maxTxNum, maxSpan) { + var sfr staticFilesInRange + sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, sfr.startJ = d.staticFilesInRange(rng) + defer func() { + if closeAll { + sfr.Close() + } + }() - func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int) (err error) { - closeAll := true - for rng := d.findMergeRange(maxSpan, maxTxNum); rng.any(); rng = d.findMergeRange(maxTxNum, maxSpan) { - var sfr staticFilesInRange - sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, sfr.startJ = d.staticFilesInRange(rng) - defer func() { - if closeAll { - sfr.Close() - } - }() - - var mf mergedDomainFiles - if mf.values, mf.index, mf.history, err = d.mergeFiles(ctx, sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, rng, workers); err != nil { - return err + var mf mergedDomainFiles + if mf.values, mf.index, mf.history, err = d.mergeFiles(ctx, sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, rng, workers); err != nil { + return err + } + defer func() { + if closeAll { + mf.Close() } - defer func() { - if closeAll { - mf.Close() - } - }() + }() - //defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) - d.integrateMergedFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, mf.values, mf.index, mf.history) + //defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) + d.integrateMergedFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, mf.values, mf.index, mf.history) - if err := d.deleteFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles); err != nil { - return err - } + // if err := d.deleteFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles); err != nil { + // return err + // } - log.Info(fmt.Sprintf("domain files mergedRange[%d, %d) name=%s span=%d \n", rng.valuesStartTxNum, rng.valuesEndTxNum, d.filenameBase, maxSpan)) - } - closeAll = false - return nil + log.Info(fmt.Sprintf("domain files mergedRange[%d, %d) name=%s span=%d \n", rng.valuesStartTxNum, rng.valuesEndTxNum, d.filenameBase, maxSpan)) } -*/ + closeAll = false + return nil +} func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint64, uint64) { var minFound bool @@ -254,12 +251,11 @@ func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint return minFound, startTxNum, endTxNum } -/* // nolint func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int) (err error) { closeAll := true for updated, startTx, endTx := ii.findMergeRange(maxSpan, maxTxNum); updated; updated, startTx, endTx = ii.findMergeRange(maxTxNum, maxSpan) { - staticFiles, startJ := ii.staticFilesInRange(startTx, endTx) + staticFiles, _ := ii.staticFilesInRange(startTx, endTx) defer func() { if closeAll { for _, i := range staticFiles { @@ -268,7 +264,6 @@ func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan } } }() - _ = startJ mergedIndex, err := ii.mergeFiles(ctx, staticFiles, startTx, endTx, workers) if err != nil { @@ -281,19 +276,14 @@ func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan } }() - //defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) ii.integrateMergedFiles(staticFiles, mergedIndex) - - if err := ii.deleteFiles(staticFiles); err != nil { - return err - } - - log.Info(fmt.Sprintf("domain files mergedRange[%d, %d) name=%s span=%d \n", startTx, endTx, ii.filenameBase, maxSpan)) + // if err := ii.deleteFiles(staticFiles); err != nil { + // return err + // } } closeAll = false return nil } -*/ type HistoryRanges struct { historyStartTxNum uint64 @@ -479,16 +469,13 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor return } var comp *compress.Compressor - //var decomp *compress.Decompressor - var closeItem = true + closeItem := true + defer func() { if closeItem { if comp != nil { comp.Close() } - //if decomp != nil { - // decomp.Close() - //} if indexIn != nil { if indexIn.decompressor != nil { indexIn.decompressor.Close() From 42b62102193dfa8e49772f5dbb0036c805725750 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 3 Feb 2023 16:40:31 +0000 Subject: [PATCH 04/54] combined compressed values with indexed keys --- state/aggregator.go | 10 +++--- state/aggregator_test.go | 61 +++++++++++++++++++++++-------- state/btree_index.go | 78 ++++++++++++++++++++-------------------- state/merge.go | 8 ++--- 4 files changed, 95 insertions(+), 62 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index a42e48411..4189e300f 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -285,7 +285,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { }(&wg, d, collation) if i != 3 { // do not warmup commitment domain - if err := d.warmup(txFrom, d.aggregationStep/10, d.tx); err != nil { + if err := d.warmup(ctx, txFrom, d.aggregationStep/10, d.tx); err != nil { return fmt.Errorf("warmup %q domain failed: %w", d.filenameBase, err) } } @@ -313,8 +313,11 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } d.integrateFiles(sf, step*a.aggregationStep, (step+1)*a.aggregationStep) + icx := d.MakeContext() + defer icx.Close() + mm := d.endTxNumMinimax() - if err := d.mergeRangesUpTo(ctx, mm, maxSpan, workers); err != nil { + if err := d.mergeRangesUpTo(ctx, mm, maxSpan, workers, icx); err != nil { errCh <- err return } @@ -415,12 +418,11 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work // "step", step, // "range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5), "upto_tx", maxEndTxNum, "merge_took", time.Since(mergeStartedAt), - "step_took", time.Since(stepStartedAt), + // "step_took", time.Since(stepStartedAt), "collate_min", clo, "collate_max", chi, "prune_min", plo, "prune_max", phi, "files_build_min", blo, "files_build_max", bhi) - return nil return true, nil } diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 427e8b738..1cc4306dc 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -18,6 +18,7 @@ import ( "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/length" + "github.com/ledgerwatch/erigon-lib/compress" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" ) @@ -435,12 +436,10 @@ func Test_EncodeCommitmentState(t *testing.T) { func Test_BtreeIndex_Seek(t *testing.T) { tmp := t.TempDir() args := BtIndexWriterArgs{ - Enums: true, IndexFile: path.Join(tmp, "1M.bt"), TmpDir: tmp, KeyCount: 1_000, EtlBufLimit: 0, - Salt: 0, } iw, err := NewBtIndexWriter(args) require.NoError(t, err) @@ -467,7 +466,7 @@ func Test_BtreeIndex_Seek(t *testing.T) { require.NoError(t, iw.Build()) iw.Close() - bt, err := OpenBtreeIndex(args.IndexFile, 4) + bt, err := OpenBtreeIndex(args.IndexFile, "", 4) require.NoError(t, err) require.EqualValues(t, bt.KeyCount(), args.KeyCount) @@ -491,12 +490,10 @@ func Test_BtreeIndex_Seek(t *testing.T) { func Test_InitBtreeIndex(t *testing.T) { tmp := t.TempDir() args := BtIndexWriterArgs{ - Enums: true, - IndexFile: path.Join(tmp, "100k.bt"), - TmpDir: tmp, - KeyCount: 100, - EtlBufLimit: 0, - Salt: 0, + IndexFile: path.Join(tmp, "100k.bt"), + TmpDir: tmp, + KeyCount: 100, + KeySize: 52, } iw, err := NewBtIndexWriter(args) require.NoError(t, err) @@ -505,20 +502,56 @@ func Test_InitBtreeIndex(t *testing.T) { defer os.RemoveAll(tmp) rnd := rand.New(rand.NewSource(0)) - keys := make([]byte, 52) + keys := make([]byte, args.KeySize) + values := make([]byte, 300) + + comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) + require.NoError(t, err) + for i := 0; i < args.KeyCount; i++ { - n, err := rnd.Read(keys[:52]) - require.EqualValues(t, n, 52) + // n, err := rnd.Read(keys[:52]) + // require.EqualValues(t, n, 52) + // require.NoError(t, err) + + n, err := rnd.Read(values[:rnd.Intn(300)]) require.NoError(t, err) - err = iw.AddKey(keys[:], uint64(i)) + err = comp.AddWord(values[:n]) + require.NoError(t, err) + } + + err = comp.Compress() + require.NoError(t, err) + comp.Close() + + decomp, err := compress.NewDecompressor(path.Join(tmp, "100k.v2")) + require.NoError(t, err) + + getter := decomp.MakeGetter() + getter.Reset(0) + + var pos uint64 + for i := 0; i < args.KeyCount; i++ { + if !getter.HasNext() { + t.Fatalf("not enough values at %d", i) + break + } + pos = getter.Skip() + // getter.Next(values[:0]) + + n, err := rnd.Read(keys[:args.KeySize]) + require.EqualValues(t, n, args.KeySize) + require.NoError(t, err) + + err = iw.AddKey(keys[:], uint64(pos)) require.NoError(t, err) } + decomp.Close() require.NoError(t, iw.Build()) iw.Close() - bt, err := OpenBtreeIndex(args.IndexFile, 4) + bt, err := OpenBtreeIndex(args.IndexFile, path.Join(tmp, "100k.v2"), 4) require.NoError(t, err) require.EqualValues(t, bt.KeyCount(), args.KeyCount) bt.Close() diff --git a/state/btree_index.go b/state/btree_index.go index feff45dc6..47f8a1abe 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "context" - "crypto/rand" "encoding/binary" "fmt" "math" @@ -19,9 +18,9 @@ import ( "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/length" + "github.com/ledgerwatch/erigon-lib/compress" "github.com/ledgerwatch/erigon-lib/etl" "github.com/ledgerwatch/erigon-lib/mmap" - "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" ) func logBase(n, base uint64) uint64 { @@ -911,36 +910,23 @@ type BtIndexWriter struct { batchSizeLimit uint64 indexW *bufio.Writer indexF *os.File - offsetEf *eliasfano32.EliasFano // Elias Fano instance for encoding the offsets - bucketCollector *etl.Collector // Collector that sorts by buckets + bucketCollector *etl.Collector // Collector that sorts by buckets indexFileName string indexFile string tmpDir string - salt uint32 // Murmur3 hash used for converting keys to 64-bit values and assigning to buckets - keyBuf []byte - numBuf []byte + numBuf [8]byte keyCount uint64 keySize int etlBufLimit datasize.ByteSize bytesPerRec int - - // fot batch processing - //keys []uint64 - //vals []uint64 } type BtIndexWriterArgs struct { - // Whether two level index needs to be built, where perfect hash map points to an enumeration, and enumeration points to offsets - // if Enum=false: can have unsorted and duplicated values - // if Enum=true: must have sorted values (can have duplicates) - monotonically growing sequence - Enums bool // todo only support true mode - - IndexFile string // File name where the index and the minimal perfect hash function will be written to - TmpDir string - //StartSeed []uint64 // For each level of recursive split, the hash seed (salt) used for that level - need to be generated randomly and be large enough to accomodate all the levels + IndexFile string // File name where the index and the minimal perfect hash function will be written to + TmpDir string KeyCount int + KeySize int EtlBufLimit datasize.ByteSize - Salt uint32 // Hash seed (salt) for the hash function used for allocating the initial buckets - need to be generated randomly } const BtreeLogPrefix = "btree" @@ -951,14 +937,6 @@ const BtreeLogPrefix = "btree" // are likely to use different hash function, to collision attacks are unlikely to slow down any meaningful number of nodes at the same time func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { btw := &BtIndexWriter{} - btw.salt = args.Salt - if btw.salt == 0 { - seedBytes := make([]byte, 4) - if _, err := rand.Read(seedBytes); err != nil { - return nil, err - } - btw.salt = binary.BigEndian.Uint32(seedBytes) - } btw.tmpDir = args.TmpDir btw.indexFile = args.IndexFile _, fname := filepath.Split(btw.indexFile) @@ -968,6 +946,8 @@ func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { if btw.etlBufLimit == 0 { btw.etlBufLimit = etl.BufferOptimalSize } + + btw.keySize = args.KeySize btw.bucketCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) btw.bucketCollector.LogLvl(log.LvlDebug) //btw.offsetCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) @@ -1086,10 +1066,9 @@ func (btw *BtIndexWriter) Build() error { if err = btw.indexW.WriteByte(byte(btw.bytesPerRec)); err != nil { return fmt.Errorf("write bytes per record: %w", err) } - - binary.BigEndian.PutUint32(btw.numBuf[:], btw.salt) - if _, err := btw.indexW.Write(btw.numBuf[:4]); err != nil { - return fmt.Errorf("writing salt: %w", err) + binary.BigEndian.PutUint16(btw.numBuf[:2], uint16(btw.keySize)) + if _, err = btw.indexW.Write(btw.numBuf[:2]); err != nil { + return fmt.Errorf("write number of keys: %w", err) } defer btw.bucketCollector.Close() @@ -1129,9 +1108,9 @@ func (btw *BtIndexWriter) Close() { //} } -func (btw *BtIndexWriter) Add(key, value []byte) error { +// func (btw *BtIndexWriter) Add(key, value []byte) error { -} +// } func (btw *BtIndexWriter) AddKey(key []byte, offset uint64) error { if btw.built { @@ -1174,9 +1153,12 @@ type BtIndex struct { baseDataID uint64 bytesPerRec int dataoffset uint64 + + decompressor *compress.Decompressor + getter *compress.Getter } -func OpenBtreeIndex(indexPath string, M uint64) (*BtIndex, error) { +func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { s, err := os.Stat(indexPath) if err != nil { return nil, err @@ -1201,8 +1183,8 @@ func OpenBtreeIndex(indexPath string, M uint64) (*BtIndex, error) { pos := 8 idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) //idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) - idx.bytesPerRec = int(binary.BigEndian.Uint16(idx.data[pos:])) - pos += 2 + idx.bytesPerRec = int(idx.data[pos]) + pos += 1 idx.keySize = int(binary.BigEndian.Uint16(idx.data[pos:])) pos += 2 @@ -1214,7 +1196,13 @@ func OpenBtreeIndex(indexPath string, M uint64) (*BtIndex, error) { //p := (*[]byte)(unsafe.Pointer(&idx.data[pos])) //l := int(idx.keyCount)*idx.bytesPerRec + (16 * int(idx.keyCount)) - //idx.alloc.data = p[:l] + + idx.decompressor, err = compress.NewDecompressor(dataPath) + if err != nil { + idx.Close() + return nil, err + } + idx.getter = idx.decompressor.MakeGetter() idx.alloc = newBtAlloc(idx.keyCount, M) idx.alloc.dataLookup = idx.dataLookup @@ -1235,9 +1223,19 @@ func (b *BtIndex) dataLookup(di uint64) ([]byte, []byte, error) { } key := b.data[p : p+uint64(b.keySize)] p += uint64(b.keySize) - vo := b.data[p : p+uint64(b.bytesPerRec)] - b.data[vo:] + offt := b.data[p : p+uint64(b.bytesPerRec)] + aux := make([]byte, 8) + copy(aux[8-len(offt):], offt) + + vo := binary.BigEndian.Uint64(aux) + + b.getter.Reset(vo) + var val []byte + if b.getter.HasNext() { + val, _ = b.getter.Next(nil) + } + return key, val, nil } diff --git a/state/merge.go b/state/merge.go index 5588789cc..5a484338b 100644 --- a/state/merge.go +++ b/state/merge.go @@ -192,11 +192,11 @@ func (s *staticFilesInRange) Close() { } // nolint -func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int) (err error) { +func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, dctx *DomainContext) (err error) { closeAll := true for rng := d.findMergeRange(maxSpan, maxTxNum); rng.any(); rng = d.findMergeRange(maxTxNum, maxSpan) { var sfr staticFilesInRange - sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, sfr.startJ = d.staticFilesInRange(rng) + sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, sfr.startJ = d.staticFilesInRange(rng, dctx) defer func() { if closeAll { sfr.Close() @@ -252,10 +252,10 @@ func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint } // nolint -func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int) (err error) { +func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, ictx *InvertedIndexContext) (err error) { closeAll := true for updated, startTx, endTx := ii.findMergeRange(maxSpan, maxTxNum); updated; updated, startTx, endTx = ii.findMergeRange(maxTxNum, maxSpan) { - staticFiles, _ := ii.staticFilesInRange(startTx, endTx) + staticFiles, _ := ii.staticFilesInRange(startTx, endTx, ictx) defer func() { if closeAll { for _, i := range staticFiles { From e97d3c3ac8e8837f3a633fa614b2d17c70c95ff5 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 6 Feb 2023 18:57:32 +0000 Subject: [PATCH 05/54] new bunch of fixes for the search --- state/aggregator_test.go | 171 +++++++++++++++++------- state/btree_index.go | 279 ++++++++++++++++++++------------------- 2 files changed, 269 insertions(+), 181 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 1cc4306dc..cb6b6a506 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -435,56 +435,133 @@ func Test_EncodeCommitmentState(t *testing.T) { func Test_BtreeIndex_Seek(t *testing.T) { tmp := t.TempDir() + + keyCount, M := 1000, 16 + dataPath := generateCompressedKV(t, tmp, 52, keyCount) + defer os.RemoveAll(tmp) + + indexPath := path.Join(tmp, filepath.Base(dataPath)+".bti") + err := BuildBtreeIndex(dataPath, indexPath) + require.NoError(t, err) + + bt, err := OpenBtreeIndex(indexPath, dataPath, uint64(M)) + require.NoError(t, err) + require.EqualValues(t, bt.KeyCount(), keyCount) + + idx := NewBtIndexReader(bt) + + keys, err := pivotKeysFromKV(dataPath) + require.NoError(t, err) + + for i := 818; i < len(keys); i++ { + cur, err := idx.Seek(keys[i]) + require.NoErrorf(t, err, "i=%d", i) + require.EqualValues(t, keys[i], cur.key) + require.NotEmptyf(t, cur.Value(), "i=%d", i) + // require.EqualValues(t, uint64(i), cur.Value()) + } + + // for i := 0; i < len(lookafter); i += 5 { + // cur, err := idx.Seek(lookafter[i]) + // require.NoError(t, err) + // //require.EqualValues(t, lookafter[i], cur.key) + // require.EqualValues(t, uint64(i), cur.Value()) + // for j := 0; j < 5; j++ { + // //require.EqualValues(t, lookafter[i+j], idx.Key()) + // require.EqualValues(t, uint64(i+j), cur.Value()) + // cur.Next() + // } + // } + + bt.Close() +} + +func pivotKeysFromKV(dataPath string) ([][]byte, error) { + decomp, err := compress.NewDecompressor(dataPath) + if err != nil { + return nil, err + } + + getter := decomp.MakeGetter() + getter.Reset(0) + + key := make([]byte, 0, 64) + + listing := make([][]byte, 0, 1000) + + for getter.HasNext() { + key, _ := getter.Next(key[:0]) + listing = append(listing, common.Copy(key)) + getter.Skip() + } + decomp.Close() + + return listing, nil +} + +func generateCompressedKV(t *testing.T, tmp string, keySize, keyCount int) string { args := BtIndexWriterArgs{ - IndexFile: path.Join(tmp, "1M.bt"), - TmpDir: tmp, - KeyCount: 1_000, - EtlBufLimit: 0, + IndexFile: path.Join(tmp, "100k.bt"), + TmpDir: tmp, + KeyCount: 12, } + iw, err := NewBtIndexWriter(args) require.NoError(t, err) defer iw.Close() - defer os.RemoveAll(tmp) - rnd := rand.New(rand.NewSource(0)) - keys := make([]byte, 52) - lookafter := make([][]byte, 0) - for i := 0; i < args.KeyCount; i++ { - n, err := rnd.Read(keys[:52]) - require.EqualValues(t, n, 52) + values := make([]byte, 300) + + comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) + require.NoError(t, err) + + for i := 0; i < keyCount; i++ { + // n, err := rnd.Read(keys[:52]) + // require.EqualValues(t, n, 52) + key := make([]byte, keySize) + binary.BigEndian.PutUint64(key[keySize-8:], uint64(i)) + require.NoError(t, err) + err = comp.AddWord(key[:]) require.NoError(t, err) - err = iw.AddKey(keys[:], uint64(i)) + n, err := rnd.Read(values[:rnd.Intn(300)+1]) require.NoError(t, err) - if i%1000 < 5 { - lookafter = append(lookafter, common.Copy(keys)) - } + err = comp.AddWord(values[:n]) + require.NoError(t, err) } - require.NoError(t, iw.Build()) - iw.Close() + err = comp.Compress() + require.NoError(t, err) + comp.Close() - bt, err := OpenBtreeIndex(args.IndexFile, "", 4) + decomp, err := compress.NewDecompressor(path.Join(tmp, "100k.v2")) require.NoError(t, err) - require.EqualValues(t, bt.KeyCount(), args.KeyCount) - idx := NewBtIndexReader(bt) + getter := decomp.MakeGetter() + getter.Reset(0) - for i := 0; i < len(lookafter); i += 5 { - cur, err := idx.Seek(lookafter[i]) - require.NoError(t, err) - //require.EqualValues(t, lookafter[i], cur.key) - require.EqualValues(t, uint64(i), cur.Value()) - for j := 0; j < 5; j++ { - //require.EqualValues(t, lookafter[i+j], idx.Key()) - require.EqualValues(t, uint64(i+j), cur.Value()) - cur.Next() + var pos uint64 + key := make([]byte, keySize) + for i := 0; i < keyCount; i++ { + if !getter.HasNext() { + t.Fatalf("not enough values at %d", i) + break } + + keys, _ := getter.Next(key[:0]) + err = iw.AddKey(keys[:], uint64(pos)) + + pos = getter.Skip() + require.NoError(t, err) } + decomp.Close() - bt.Close() + require.NoError(t, iw.Build()) + iw.Close() + + return decomp.FilePath() } func Test_InitBtreeIndex(t *testing.T) { @@ -492,9 +569,10 @@ func Test_InitBtreeIndex(t *testing.T) { args := BtIndexWriterArgs{ IndexFile: path.Join(tmp, "100k.bt"), TmpDir: tmp, - KeyCount: 100, - KeySize: 52, + KeyCount: 12, } + keySize := 52 + M := uint64(4) iw, err := NewBtIndexWriter(args) require.NoError(t, err) @@ -502,18 +580,20 @@ func Test_InitBtreeIndex(t *testing.T) { defer os.RemoveAll(tmp) rnd := rand.New(rand.NewSource(0)) - keys := make([]byte, args.KeySize) + keys := make([]byte, keySize) values := make([]byte, 300) comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) require.NoError(t, err) for i := 0; i < args.KeyCount; i++ { - // n, err := rnd.Read(keys[:52]) - // require.EqualValues(t, n, 52) - // require.NoError(t, err) + n, err := rnd.Read(keys[:52]) + require.EqualValues(t, n, 52) + require.NoError(t, err) + err = comp.AddWord(keys[:n]) + require.NoError(t, err) - n, err := rnd.Read(values[:rnd.Intn(300)]) + n, err = rnd.Read(values[:rnd.Intn(300)]) require.NoError(t, err) err = comp.AddWord(values[:n]) @@ -536,14 +616,11 @@ func Test_InitBtreeIndex(t *testing.T) { t.Fatalf("not enough values at %d", i) break } - pos = getter.Skip() - // getter.Next(values[:0]) - - n, err := rnd.Read(keys[:args.KeySize]) - require.EqualValues(t, n, args.KeySize) - require.NoError(t, err) + keys, _ := getter.Next(keys[:0]) err = iw.AddKey(keys[:], uint64(pos)) + + pos = getter.Skip() require.NoError(t, err) } decomp.Close() @@ -551,7 +628,9 @@ func Test_InitBtreeIndex(t *testing.T) { require.NoError(t, iw.Build()) iw.Close() - bt, err := OpenBtreeIndex(args.IndexFile, path.Join(tmp, "100k.v2"), 4) + // fixme kv is shifted by 1 + // fixme index building functions + bt, err := OpenBtreeIndex(args.IndexFile, path.Join(tmp, "100k.v2"), M) require.NoError(t, err) require.EqualValues(t, bt.KeyCount(), args.KeyCount) bt.Close() @@ -583,7 +662,7 @@ func Benchmark_BtreeIndex_Allocation(b *testing.B) { count := rnd.Intn(1000000000) bt := newBtAlloc(uint64(count), uint64(1<<12)) bt.traverseDfs() - fmt.Printf("alloc %v\n", time.Now().Sub(now)) + fmt.Printf("alloc %v\n", time.Since(now)) } } @@ -594,7 +673,7 @@ func Benchmark_BtreeIndex_Search(b *testing.B) { count := rnd.Intn(max) bt := newBtAlloc(uint64(count), uint64(1<<11)) bt.traverseDfs() - fmt.Printf("alloc %v\n", time.Now().Sub(now)) + fmt.Printf("alloc %v\n", time.Since(now)) for i := 0; i < b.N; i++ { bt.search(uint64(i % max)) diff --git a/state/btree_index.go b/state/btree_index.go index 47f8a1abe..833a77a46 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -157,6 +157,7 @@ type btAlloc struct { nodes [][]node data []uint64 naccess uint64 + trace bool dataLookup func(di uint64) ([]byte, []byte, error) } @@ -175,6 +176,7 @@ func newBtAlloc(k, M uint64) *btAlloc { M: M, K: k, d: d, + trace: true, } a.vx[0] = 1 a.vx[d] = k @@ -315,6 +317,7 @@ func (a *btAlloc) traverseDfs() { a.nodes[l] = make([]node, 0) } + // TODO if keys less than half leaf size store last key to just support bsearch on these amount. c := a.cursors[len(a.cursors)-1] pc := a.cursors[(len(a.cursors) - 2)] root := new(node) @@ -526,9 +529,8 @@ func (a *btAlloc) fetchByDi(i uint64) (uint64, bool) { func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { var exit bool - var di uint64 - for l < r { - m := (l + r) >> 1 + for l <= r { + di := (l + r) >> 1 mk, value, err := a.dataLookup(di) a.naccess++ @@ -538,14 +540,17 @@ func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { case err != nil: break case cmp == 0: - return newCursor(context.TODO(), mk, value, m), nil + return newCursor(context.TODO(), mk, value, di), nil case cmp == -1: if exit { break } - l = m + 1 + l = di + 1 default: - r = m + r = di + } + if l == r { + break } } return nil, fmt.Errorf("not found") @@ -608,10 +613,12 @@ func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { return l, r, false } -func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, uint64, []byte) { +func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, int64, int64, []byte) { var exit bool - var di, lm uint64 - n := new(node) + var lm, rm int64 + lm = -1 + rm = -1 + var n *node for l <= r { m := (l + r) >> 1 @@ -619,145 +626,116 @@ func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, uint64, []byte) { m = l exit = true } - lm = m - di = a.nodes[i][m].d n = &a.nodes[i][m] + // di = n.d + // _ = di a.naccess++ - mk, value, err := a.dataLookup(di) - cmp := bytes.Compare(mk, x) + // mk, value, err := a.dataLookup(di) + cmp := bytes.Compare(n.key, x) switch { - case err != nil: - fmt.Printf("err at switch %v\n", err) - break + // case err != nil: + // fmt.Printf("err at switch %v\n", err) + // break case cmp == 0: - return n, m, value + return n, int64(m), int64(m), n.val case cmp < 0: - if m+1 == r { - return n, m, nil - } - l = m + // if m+1 == r { + // return n, m, rm, nil + // } + l = m + 1 + lm = int64(m) default: - if m == l { - return n, m, nil - } + // if m == l { + // return n, m, rm, nil + // } r = m + rm = int64(r) } if exit { break } } - return nil, lm, nil + return n, lm, rm, nil } func (a *btAlloc) seek(ik []byte) (*Cursor, error) { - var L, m uint64 + var L, minD, maxD uint64 + var lm, rm int64 R := uint64(len(a.nodes[0]) - 1) + maxD = a.K + 1 + + if a.trace { + fmt.Printf("seek key %x\n", ik) + } ln := new(node) var val []byte for l, level := range a.nodes { - ln, m, val = a.bsNode(uint64(l), L, R, ik) - if ln == nil { + ln, lm, rm, val = a.bsNode(uint64(l), L, R, ik) + if ln == nil { // should return node which is nearest to key from the left so never nil L = 0 - fmt.Printf("found nil key %d lvl=%d naccess=%d\n", level[m].d, l, a.naccess) - break + if a.trace { + fmt.Printf("found nil key %x di=%d lvl=%d naccess_ram=%d\n", level[lm].key, level[lm].d, l, a.naccess) + } + panic(fmt.Errorf("nil node at %d", l)) + } + if lm >= 0 { + minD = a.nodes[l][lm].d + L = level[lm].fc + } + if rm >= 0 { + maxD = a.nodes[l][rm].d + R = level[rm].fc } - switch bytes.Compare(ln.key, ik) { // k.compare(ik) { - case 1: - if ln.fc > 0 { - L = ln.fc - 1 - } else { - L = 0 - } + switch bytes.Compare(ln.key, ik) { + case 1: // key > ik + maxD = ln.d + case -1: // key < ik + minD = ln.d case 0: - fmt.Printf("found key %+v = %v naccess=%d\n", ik, val /*level[m].d,*/, a.naccess) - //return true - return newCursor(context.TODO(), ln.key, val, ln.d), nil - //break - default: - if m < uint64(len(level)) { - R = level[m+1].fc - //R = level[m].fc - } else { - R = uint64(len(a.nodes[l+1]) - 1) + if a.trace { + fmt.Printf("found key %x v=%x naccess_ram=%d\n", ik, val /*level[m].d,*/, a.naccess) } + return newCursor(context.TODO(), ln.key, val, ln.d), nil + } + if a.trace { + fmt.Printf("range={%x d=%d p=%d} (%d, %d) L=%d naccess_ram=%d\n", ln.key, ln.d, ln.p, minD, maxD, l, a.naccess) } - fmt.Printf("range={%+v} (%d, %d) L=%d naccess=%d\n", ln, L, R, l, a.naccess) } switch bytes.Compare(ik, ln.key) { case -1: - L = 0 + L = minD // =0 case 0: - fmt.Printf("last found key %d naccess=%d\n", ln.d, a.naccess) + if a.trace { + fmt.Printf("last found key %x v=%x di=%d naccess_ram=%d\n", ln.key, ln.val, ln.d, a.naccess) + } return newCursor(context.TODO(), ln.key, val, ln.d), nil case 1: - L = ln.d + L = ln.d + 1 } a.naccess = 0 // reset count before actually go to storage - cursor, err := a.bsKey(ik, L, a.nodes[a.d-1][m+1].d) + cursor, err := a.bsKey(ik, L, maxD) if err != nil { - //if trace { - fmt.Printf("key %+v not found\n", ik) - //} - //return true + if a.trace { + fmt.Printf("key %x not found\n", ik) + } + return nil, err } else { - fmt.Printf("last found key %+v naccess=%d [%v]\n", cursor.key, a.naccess, err) + if a.trace { + fmt.Printf("finally found key %x v=%x naccess_disk=%d [err=%v]\n", cursor.key, cursor.value, a.naccess, err) + } return cursor, nil } return nil, fmt.Errorf("key not found") } -// deprecated -func (a *btAlloc) lookup(ik uint64) bool { - trace, direct, found := true, false, false - mk, l, r := uint64(0), uint64(0), uint64(len(a.nodes[0])-1) - - for i := 0; i < len(a.nodes); i++ { - mk, r, found = a.bs(uint64(i), ik, l, r, direct) - if found { - if trace { - fmt.Printf("found key %d naccess=%d\n", a.nodes[i][mk].d, a.naccess) - } - return true - } - if trace { - fmt.Printf("range={%d,%d} (%d, %d) L=%d naccess=%d\n", a.nodes[i][l].d, a.nodes[i][r].d, l, r, i, a.naccess) - } - - l, r = a.nodes[i][mk].fc, a.nodes[i][r].fc - if trace && i < len(a.nodes)-1 { - fmt.Printf("next range={%d,%d} (%d, %d) L=%d naccess=%d\n", a.nodes[i+1][l].d, a.nodes[i+1][r].d, l, r, i+1, a.naccess) - } - } - - mindi, maxdi := uint64(0), a.nodes[a.d-1][l+1].d - if l > 0 { - mindi = a.nodes[a.d-1][l-1].d - } - if trace { - fmt.Printf("smallest range {%d-%d} (%d-%d)\n", mindi, maxdi, l-1, l+1) - } - - // search in smallest found interval - direct = true - mk, _, found = a.bs(a.d-1, ik, mindi, maxdi, direct) - if found { - if trace { - fmt.Printf("last found key %d naccess=%d\n", mk, a.naccess) - } - return true - } - - return false -} - // deprecated func (a *btAlloc) search(ik uint64) bool { l, r := uint64(0), uint64(len(a.nodes[0])) @@ -888,7 +866,7 @@ func (r *BtIndexReader) Seek(x []byte) (*Cursor, error) { if r.index != nil { cursor, err := r.index.alloc.seek(x) if err != nil { - return nil, err + return nil, fmt.Errorf("seek key %x: %w", x, err) } cursor.ix = r.index return cursor, nil @@ -916,7 +894,6 @@ type BtIndexWriter struct { tmpDir string numBuf [8]byte keyCount uint64 - keySize int etlBufLimit datasize.ByteSize bytesPerRec int } @@ -925,7 +902,6 @@ type BtIndexWriterArgs struct { IndexFile string // File name where the index and the minimal perfect hash function will be written to TmpDir string KeyCount int - KeySize int EtlBufLimit datasize.ByteSize } @@ -947,7 +923,6 @@ func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { btw.etlBufLimit = etl.BufferOptimalSize } - btw.keySize = args.KeySize btw.bucketCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) btw.bucketCollector.LogLvl(log.LvlDebug) //btw.offsetCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) @@ -966,9 +941,9 @@ func (btw *BtIndexWriter) loadFuncBucket(k, v []byte, _ etl.CurrentTableReader, // } //} - if _, err := btw.indexW.Write(k); err != nil { - return err - } + // if _, err := btw.indexW.Write(k); err != nil { + // return err + // } if _, err := btw.indexW.Write(v[8-btw.bytesPerRec:]); err != nil { return err } @@ -1066,10 +1041,6 @@ func (btw *BtIndexWriter) Build() error { if err = btw.indexW.WriteByte(byte(btw.bytesPerRec)); err != nil { return fmt.Errorf("write bytes per record: %w", err) } - binary.BigEndian.PutUint16(btw.numBuf[:2], uint16(btw.keySize)) - if _, err = btw.indexW.Write(btw.numBuf[:2]); err != nil { - return fmt.Errorf("write number of keys: %w", err) - } defer btw.bucketCollector.Close() log.Log(btw.lvl, "[index] calculating", "file", btw.indexFileName) @@ -1116,9 +1087,6 @@ func (btw *BtIndexWriter) AddKey(key []byte, offset uint64) error { if btw.built { return fmt.Errorf("cannot add keys after perfect hash function had been built") } - if len(key) != btw.keySize { - return fmt.Errorf("invalid key size %d while expected %d", len(key), btw.keySize) - } binary.BigEndian.PutUint64(btw.numBuf[:], offset) if offset > btw.maxOffset { @@ -1149,15 +1117,55 @@ type BtIndex struct { modTime time.Time filePath string keyCount uint64 - keySize int baseDataID uint64 bytesPerRec int dataoffset uint64 + auxBuf []byte decompressor *compress.Decompressor getter *compress.Getter } +func BuildBtreeIndex(dataPath, indexPath string) error { + decomp, err := compress.NewDecompressor(dataPath) + if err != nil { + return err + } + + args := BtIndexWriterArgs{ + IndexFile: indexPath, + TmpDir: filepath.Dir(indexPath), + } + + iw, err := NewBtIndexWriter(args) + if err != nil { + return err + } + + getter := decomp.MakeGetter() + getter.Reset(0) + + key := make([]byte, 0, 64) + + var pos uint64 + for getter.HasNext() { + key, _ := getter.Next(key[:0]) + err = iw.AddKey(key[:], uint64(pos)) + if err != nil { + return err + } + + pos = getter.Skip() + } + decomp.Close() + + if err := iw.Build(); err != nil { + return err + } + iw.Close() + return nil +} + func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { s, err := os.Stat(indexPath) if err != nil { @@ -1168,6 +1176,7 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { filePath: indexPath, size: s.Size(), modTime: s.ModTime(), + auxBuf: make([]byte, 64), } idx.file, err = os.Open(indexPath) @@ -1179,6 +1188,7 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { return nil, err } idx.data = idx.mmapUnix[:idx.size] + // Read number of keys and bytes per record pos := 8 idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) @@ -1186,13 +1196,10 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { idx.bytesPerRec = int(idx.data[pos]) pos += 1 - idx.keySize = int(binary.BigEndian.Uint16(idx.data[pos:])) - pos += 2 - - offset := int(idx.keyCount)*idx.bytesPerRec + (idx.keySize * int(idx.keyCount)) - if offset < 0 { - return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) - } + // offset := int(idx.keyCount) * idx.bytesPerRec //+ (idx.keySize * int(idx.keyCount)) + // if offset < 0 { + // return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) + // } //p := (*[]byte)(unsafe.Pointer(&idx.data[pos])) //l := int(idx.keyCount)*idx.bytesPerRec + (16 * int(idx.keyCount)) @@ -1217,25 +1224,27 @@ func (b *BtIndex) dataLookup(di uint64) ([]byte, []byte, error) { return nil, nil, fmt.Errorf("ki is greater than key count in index") } - p := b.dataoffset + di*uint64(b.bytesPerRec) + uint64(b.keySize)*di - if uint64(len(b.data)) < p+uint64(b.keySize)+uint64(b.bytesPerRec) { - return nil, nil, fmt.Errorf("data lookup gone too far (%d after %d)", p+16+uint64(b.bytesPerRec)-uint64(len(b.data)), len(b.data)) + p := b.dataoffset + di*uint64(b.bytesPerRec) + if uint64(len(b.data)) < p+uint64(b.bytesPerRec) { + return nil, nil, fmt.Errorf("data lookup gone too far (%d after %d)", p+uint64(b.bytesPerRec)-uint64(len(b.data)), len(b.data)) } - key := b.data[p : p+uint64(b.keySize)] - p += uint64(b.keySize) offt := b.data[p : p+uint64(b.bytesPerRec)] - aux := make([]byte, 8) + var aux [8]byte copy(aux[8-len(offt):], offt) - vo := binary.BigEndian.Uint64(aux) - - b.getter.Reset(vo) - var val []byte - if b.getter.HasNext() { - val, _ = b.getter.Next(nil) + offset := binary.BigEndian.Uint64(aux[:]) + b.getter.Reset(offset) + if !b.getter.HasNext() { + return nil, nil, fmt.Errorf("pair %d not found", di) } + key, _ := b.getter.Next(nil) + + if !b.getter.HasNext() { + return nil, nil, fmt.Errorf("pair %d not found", di) + } + val, _ := b.getter.Next(nil) return key, val, nil } From 0ff768467c4c6cc9defa3d352b4c87893ad66353 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 6 Feb 2023 19:13:06 +0000 Subject: [PATCH 06/54] another fixup on search --- state/aggregator_test.go | 2 +- state/btree_index.go | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index cb6b6a506..dcc589a22 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -453,7 +453,7 @@ func Test_BtreeIndex_Seek(t *testing.T) { keys, err := pivotKeysFromKV(dataPath) require.NoError(t, err) - for i := 818; i < len(keys); i++ { + for i := 821; i < len(keys); i++ { cur, err := idx.Seek(keys[i]) require.NoErrorf(t, err, "i=%d", i) require.EqualValues(t, keys[i], cur.key) diff --git a/state/btree_index.go b/state/btree_index.go index 833a77a46..29838f9e1 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -661,6 +661,16 @@ func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, int64, int64, []byte) return n, lm, rm, nil } +// find position of key with node.di <= d at level lvl +func (a *btAlloc) seekLeast(lvl, d uint64) int { + for i, node := range a.nodes[lvl] { + if node.d >= d { + return i + } + } + return len(a.nodes[lvl]) +} + func (a *btAlloc) seek(ik []byte) (*Cursor, error) { var L, minD, maxD uint64 var lm, rm int64 @@ -685,10 +695,24 @@ func (a *btAlloc) seek(ik []byte) (*Cursor, error) { if lm >= 0 { minD = a.nodes[l][lm].d L = level[lm].fc + } else { + if l+1 != len(a.nodes) { + L = uint64(a.seekLeast(uint64(l+1), minD)) + if L == uint64(len(a.nodes[l+1])) { + L-- + } + } } if rm >= 0 { maxD = a.nodes[l][rm].d R = level[rm].fc + } else { + if l+1 != len(a.nodes) { + R = uint64(a.seekLeast(uint64(l+1), maxD)) + if R == uint64(len(a.nodes[l+1])) { + R-- + } + } } switch bytes.Compare(ln.key, ik) { From e503a617dad27c145234bf4a6e89610d1f52a2dd Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 8 Feb 2023 18:05:54 +0000 Subject: [PATCH 07/54] test updates --- state/aggregator_bench_test.go | 55 ++++ state/aggregator_test.go | 109 ++++--- state/btree_index.go | 531 +++++++-------------------------- 3 files changed, 240 insertions(+), 455 deletions(-) diff --git a/state/aggregator_bench_test.go b/state/aggregator_bench_test.go index 19374692f..d5a985fc3 100644 --- a/state/aggregator_bench_test.go +++ b/state/aggregator_bench_test.go @@ -2,9 +2,13 @@ package state import ( "context" + "fmt" "math/rand" "os" + "path" + "path/filepath" "testing" + "time" "github.com/ledgerwatch/log/v3" "github.com/stretchr/testify/require" @@ -85,3 +89,54 @@ func queueKeys(ctx context.Context, seed, ofSize uint64) <-chan []byte { }() return keys } + +func Benchmark_BtreeIndex_Allocation(b *testing.B) { + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + for i := 0; i < b.N; i++ { + now := time.Now() + count := rnd.Intn(1000000000) + bt := newBtAlloc(uint64(count), uint64(1<<12), true) + bt.traverseDfs() + fmt.Printf("alloc %v\n", time.Since(now)) + } +} +func Benchmark_BtreeIndex_Search(b *testing.B) { + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + // max := 100000000 + // count := rnd.Intn(max) + // bt := newBtAlloc(uint64(count), uint64(1<<11)) + // bt.traverseDfs() + // fmt.Printf("alloc %v\n", time.Since(now)) + + tmp := b.TempDir() + + // dataPath := generateCompressedKV(b, tmp, 52, 10, keyCount) + defer os.RemoveAll(tmp) + dir, _ := os.Getwd() + fmt.Printf("path %s\n", dir) + dataPath := "../../data/storage.256-288.kv" + + indexPath := path.Join(tmp, filepath.Base(dataPath)+".bti") + err := BuildBtreeIndex(dataPath, indexPath) + require.NoError(b, err) + + M := 1024 + bt, err := OpenBtreeIndex(indexPath, dataPath, uint64(M)) + + require.NoError(b, err) + + idx := NewBtIndexReader(bt) + + keys, err := pivotKeysFromKV(dataPath) + require.NoError(b, err) + + for i := 0; i < b.N; i++ { + p := rnd.Intn(len(keys)) + cur, err := idx.Seek(keys[p]) + require.NoErrorf(b, err, "i=%d", i) + require.EqualValues(b, keys[p], cur.key) + require.NotEmptyf(b, cur.Value(), "i=%d", i) + } + + bt.Close() +} diff --git a/state/aggregator_test.go b/state/aggregator_test.go index dcc589a22..88a10b98e 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -1,6 +1,7 @@ package state import ( + "bytes" "context" "encoding/binary" "fmt" @@ -437,7 +438,7 @@ func Test_BtreeIndex_Seek(t *testing.T) { tmp := t.TempDir() keyCount, M := 1000, 16 - dataPath := generateCompressedKV(t, tmp, 52, keyCount) + dataPath := generateCompressedKV(t, tmp, 52, 180 /*val size*/, keyCount) defer os.RemoveAll(tmp) indexPath := path.Join(tmp, filepath.Base(dataPath)+".bti") @@ -453,7 +454,7 @@ func Test_BtreeIndex_Seek(t *testing.T) { keys, err := pivotKeysFromKV(dataPath) require.NoError(t, err) - for i := 821; i < len(keys); i++ { + for i := 0; i < len(keys); i++ { cur, err := idx.Seek(keys[i]) require.NoErrorf(t, err, "i=%d", i) require.EqualValues(t, keys[i], cur.key) @@ -461,18 +462,6 @@ func Test_BtreeIndex_Seek(t *testing.T) { // require.EqualValues(t, uint64(i), cur.Value()) } - // for i := 0; i < len(lookafter); i += 5 { - // cur, err := idx.Seek(lookafter[i]) - // require.NoError(t, err) - // //require.EqualValues(t, lookafter[i], cur.key) - // require.EqualValues(t, uint64(i), cur.Value()) - // for j := 0; j < 5; j++ { - // //require.EqualValues(t, lookafter[i+j], idx.Key()) - // require.EqualValues(t, uint64(i+j), cur.Value()) - // cur.Next() - // } - // } - bt.Close() } @@ -499,7 +488,7 @@ func pivotKeysFromKV(dataPath string) ([][]byte, error) { return listing, nil } -func generateCompressedKV(t *testing.T, tmp string, keySize, keyCount int) string { +func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount int) string { args := BtIndexWriterArgs{ IndexFile: path.Join(tmp, "100k.bt"), TmpDir: tmp, @@ -511,7 +500,7 @@ func generateCompressedKV(t *testing.T, tmp string, keySize, keyCount int) strin defer iw.Close() rnd := rand.New(rand.NewSource(0)) - values := make([]byte, 300) + values := make([]byte, valueSize) comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) require.NoError(t, err) @@ -525,7 +514,7 @@ func generateCompressedKV(t *testing.T, tmp string, keySize, keyCount int) strin err = comp.AddWord(key[:]) require.NoError(t, err) - n, err := rnd.Read(values[:rnd.Intn(300)+1]) + n, err := rnd.Read(values[:rnd.Intn(valueSize)+1]) require.NoError(t, err) err = comp.AddWord(values[:n]) @@ -643,7 +632,7 @@ func Test_BtreeIndex_Allocation(t *testing.T) { t.Run(fmt.Sprintf("%d", m< 0 { + t.Fatalf("prev %s cur %s, next key should be greater", prevKey, nk) + } + prevKey = nk + } + if i%1000 == 0 { + fmt.Printf("%d searches, last took %v total seek time %v avg=%v next_access_last[%d] %v\n", i, took, tsum, tsum/time.Duration(i), j, ntimer/time.Duration(j)) + } -func Benchmark_BtreeIndex_Search(b *testing.B) { - rnd := rand.New(rand.NewSource(time.Now().UnixNano())) - now := time.Now() - max := 100000000 - count := rnd.Intn(max) - bt := newBtAlloc(uint64(count), uint64(1<<11)) - bt.traverseDfs() - fmt.Printf("alloc %v\n", time.Since(now)) - - for i := 0; i < b.N; i++ { - bt.search(uint64(i % max)) } + avg := tsum / (1000000 - 1) + fmt.Printf("avg seek time %v\n", avg) + + bt.Close() } diff --git a/state/btree_index.go b/state/btree_index.go index 29838f9e1..8f1247d01 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -55,72 +55,22 @@ type node struct { val []byte } -type key struct { - bucket, fprint uint64 -} - -func bytesToKey(b []byte) key { - if len(b) > 16 { - panic(fmt.Errorf("invalid size of key bytes to convert (size %d)", len(b))) - } - return key{ - bucket: binary.BigEndian.Uint64(b), - fprint: binary.BigEndian.Uint64(b[8:]), - } -} - -func (k key) compare(k2 key) int { - if k.bucket < k2.bucket { - return -1 - } - if k.bucket > k2.bucket { - return 1 - } - if k.fprint < k2.fprint { - return -1 - } - if k.fprint > k2.fprint { - return 1 - } - return 0 -} - -func (k key) Bytes() []byte { - buf := make([]byte, 16) - binary.BigEndian.PutUint64(buf[:8], k.bucket) - binary.BigEndian.PutUint64(buf[8:], k.fprint) - return buf -} - -// deprecated -func binsearch(a []node, x uint64) uint64 { - l, r := uint64(0), uint64(len(a)) - for l < r { - mid := (l + r) / 2 - if a[mid].d < x { - l = mid + 1 - } else { - r = mid - } - } - return l -} - type Cursor struct { ctx context.Context - ix *BtIndex + ix *btAlloc key []byte value []byte d uint64 } -func newCursor(ctx context.Context, k, v []byte, d uint64) *Cursor { +func (a *btAlloc) newCursor(ctx context.Context, k, v []byte, d uint64) *Cursor { return &Cursor{ ctx: ctx, - key: k, - value: v, + key: common.Copy(k), + value: common.Copy(v), d: d, + ix: a, } } @@ -133,7 +83,7 @@ func (c *Cursor) Value() []byte { } func (c *Cursor) Next() bool { - if c.d+1 >= c.ix.KeyCount() { + if c.d > c.ix.K-1 { return false } k, v, err := c.ix.dataLookup(c.d + 1) @@ -162,11 +112,8 @@ type btAlloc struct { dataLookup func(di uint64) ([]byte, []byte, error) } -func newBtAlloc(k, M uint64) *btAlloc { +func newBtAlloc(k, M uint64, trace bool) *btAlloc { d := logBase(k, M) - m := max64(2, M>>1) - - fmt.Printf("k=%d d=%d, M=%d m=%d\n", k, d, M, m) a := &btAlloc{ vx: make([]uint64, d+1), sons: make([][]uint64, d+1), @@ -176,10 +123,12 @@ func newBtAlloc(k, M uint64) *btAlloc { M: M, K: k, d: d, - trace: true, + trace: trace, } - a.vx[0] = 1 - a.vx[d] = k + if trace { + fmt.Printf("k=%d d=%d, M=%d\n", k, d, M) + } + a.vx[0], a.vx[d] = 1, k nnc := func(vx uint64) uint64 { return uint64(math.Ceil(float64(vx) / float64(M))) @@ -191,7 +140,6 @@ func newBtAlloc(k, M uint64) *btAlloc { //nnc := a.vx[i+1] / M //nvc := a.vx[i+1] / m //bvc := a.vx[i+1] / (m + (m >> 1)) - //_, _ = nvc, nnc a.vx[i] = min64(uint64(math.Pow(float64(M), float64(i))), nnc) } @@ -199,17 +147,6 @@ func newBtAlloc(k, M uint64) *btAlloc { pnv := uint64(0) for l := a.d - 1; l > 0; l-- { s := nnc(a.vx[l+1]) - //left := a.vx[l+1] % M - //if left > 0 { - // if left < m { - // s-- - // newPrev := M - (m - left) - // dp := M - newPrev - // a.sons[l] = append(a.sons[l], 1, newPrev, 1, left+dp) - // } else { - // a.sons[l] = append(a.sons[l], 1, left) - // } - //} a.sons[l] = append(a.sons[l], s, M) for ik := 0; ik < len(a.sons[l]); ik += 2 { ncount += a.sons[l][ik] * a.sons[l][ik+1] @@ -221,10 +158,12 @@ func newBtAlloc(k, M uint64) *btAlloc { a.sons[0] = []uint64{1, pnv} ncount += a.sons[0][0] * a.sons[0][1] // last one a.N = ncount - fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-uint64(k))/float64(a.N)) - for i, v := range a.sons { - fmt.Printf("L%d=%v\n", i, v) + if trace { + fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-uint64(k))/float64(a.N)) + for i, v := range a.sons { + fmt.Printf("L%d=%v\n", i, v) + } } return a @@ -331,7 +270,9 @@ func (a *btAlloc) traverseDfs() { // -- no bros -> shift cursor (tricky) if di > a.K { a.N = di - 1 // actually filled node count - fmt.Printf("ncount=%d ∂%.5f\n", a.N, float64(a.N-a.K)/float64(a.N)) + if a.trace { + fmt.Printf("ncount=%d ∂%.5f\n", a.N, float64(a.N-a.K)/float64(a.N)) + } break } @@ -430,105 +371,7 @@ func (a *btAlloc) traverseDfs() { } } -// deprecated -func (a *btAlloc) traverse() { - var sum uint64 - for l := 0; l < len(a.sons)-1; l++ { - if len(a.sons[l]) < 2 { - panic("invalid btree allocation markup") - } - a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} - - for i := 0; i < len(a.sons[l]); i += 2 { - sum += a.sons[l][i] * a.sons[l][i+1] - } - a.nodes[l] = make([]node, 0) - } - fmt.Printf("nodes total %d\n", sum) - - c := a.cursors[len(a.cursors)-1] - - var di uint64 - for stop := false; !stop; { - bros := a.sons[c.l][c.p] - parents := a.sons[c.l][c.p-1] - - // fill leaves, mark parent if needed (until all grandparents not marked up until root) - // check if eldest parent has brothers - // -- has bros -> fill their leaves from the bottom - // -- no bros -> shift cursor (tricky) - - for i := uint64(0); i < bros; i++ { - c.di = di - fmt.Printf("L%d |%d| d %2d s %2d\n", c.l, c.p, c.di, c.si) - c.si++ - di++ - } - - pid := c.si / bros - if pid >= parents { - if c.p+2 >= uint64(len(a.sons[c.l])) { - stop = true // end of row - fmt.Printf("F%d |%d| d %2d\n", c.l, c.p, c.di) - } else { - //fmt.Printf("N %d d%d s%d\n", c.l, c.di, c.si) - //a.nodes[c.l] = append(a.nodes[c.l], node{p: c.p, d: c.di, s: c.si}) - c.p += 2 - c.si = 0 - c.di = 0 - } - } - a.cursors[c.l] = c - - for l := len(a.cursors) - 2; l >= 0; l-- { - pc := a.cursors[l] - uncles := a.sons[pc.l][pc.p] - grands := a.sons[pc.l][pc.p-1] - - pi1 := pc.si / uncles - pc.si++ - pi2 := pc.si / uncles - moved := pi2-pi1 != 0 - pc.di = di - fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si-1, pid) - a.nodes[pc.l] = append(a.nodes[pc.l], node{p: pc.p, d: pc.di, s: pc.si}) - - di++ - - if pi2 >= grands { // skip one step of si due to different parental filling order - if pc.p+2 >= uint64(len(a.sons[pc.l])) { - // end of row - fmt.Printf("E%d |%d| d %2d\n", pc.l, pc.p, pc.di) - break - } - //fmt.Printf("N %d d%d s%d\n", pc.l, pc.di, pc.si) - //fmt.Printf("P%d |%d| d %2d s %2d pid %d\n", pc.l, pc.p, pc.di, pc.si, pid) - pc.p += 2 - pc.si = 0 - pc.di = 0 - } - a.cursors[pc.l] = pc - - if l >= 1 && a.cursors[l-1].di == 0 { - continue - } - if !moved { - break - } - } - } -} - -// deprecated -func (a *btAlloc) fetchByDi(i uint64) (uint64, bool) { - if int(i) >= len(a.data) { - return 0, true - } - return a.data[i], false -} - func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { - var exit bool for l <= r { di := (l + r) >> 1 @@ -538,13 +381,10 @@ func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { cmp := bytes.Compare(mk, x) switch { case err != nil: - break + return nil, err case cmp == 0: - return newCursor(context.TODO(), mk, value, di), nil + return a.newCursor(context.TODO(), mk, value, di), nil case cmp == -1: - if exit { - break - } l = di + 1 default: r = di @@ -553,167 +393,66 @@ func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { break } } - return nil, fmt.Errorf("not found") + return nil, fmt.Errorf("key %x was not found", x) } -// deprecated -func (a *btAlloc) bs(i, x, l, r uint64, direct bool) (uint64, uint64, bool) { - var exit bool - var di uint64 - for l <= r { - m := (l + r) >> 1 - if l == r { - m = l - exit = true - } +func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (n node, lm int64, rm int64) { + n, lm, rm = node{}, -1, -1 - switch direct { - case true: - if m >= uint64(len(a.data)) { - di = a.data[a.K-1] - exit = true - } else { - di = a.data[m] - } - case false: - di = a.nodes[i][m].d - } - - mkey, nf := a.fetchByDi(di) - a.naccess++ - switch { - case nf: - break - case mkey == x: - return m, r, true - case mkey < x: - if exit { - break - } - if m+1 == r { - if m > 0 { - m-- - } - return m, r, false - } - l = m + 1 - default: - if exit { - break - } - if m-l == 1 && l > 0 { - return l - 1, r, false - } - r = m - } - if exit { - break - } - } - return l, r, false -} - -func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (*node, int64, int64, []byte) { - var exit bool - var lm, rm int64 - lm = -1 - rm = -1 - var n *node - - for l <= r { + for l < r { m := (l + r) >> 1 - if l == r { - m = l - exit = true - } - - n = &a.nodes[i][m] - // di = n.d - // _ = di + n = a.nodes[i][m] a.naccess++ - // mk, value, err := a.dataLookup(di) cmp := bytes.Compare(n.key, x) switch { - // case err != nil: - // fmt.Printf("err at switch %v\n", err) - // break case cmp == 0: - return n, int64(m), int64(m), n.val + return n, int64(m), int64(m) + case cmp > 0: + r = m + rm = int64(m) case cmp < 0: - // if m+1 == r { - // return n, m, rm, nil - // } - l = m + 1 lm = int64(m) + l = m + 1 default: - // if m == l { - // return n, m, rm, nil - // } - r = m - rm = int64(r) - } - if exit { - break + panic(fmt.Errorf("compare error %d, %x ? %x", cmp, n.key, x)) } } - return n, lm, rm, nil + return n, lm, rm } // find position of key with node.di <= d at level lvl -func (a *btAlloc) seekLeast(lvl, d uint64) int { +func (a *btAlloc) seekLeast(lvl, d uint64) uint64 { for i, node := range a.nodes[lvl] { if node.d >= d { - return i + return uint64(i) } } - return len(a.nodes[lvl]) + return uint64(len(a.nodes[lvl])) } -func (a *btAlloc) seek(ik []byte) (*Cursor, error) { - var L, minD, maxD uint64 - var lm, rm int64 - R := uint64(len(a.nodes[0]) - 1) - maxD = a.K + 1 - +func (a *btAlloc) Seek(ik []byte) (*Cursor, error) { if a.trace { fmt.Printf("seek key %x\n", ik) } - ln := new(node) - var val []byte + var ( + lm, rm int64 + L, R = uint64(0), uint64(len(a.nodes[0]) - 1) + minD, maxD = uint64(0), uint64(a.K) + ln node + ) + for l, level := range a.nodes { - ln, lm, rm, val = a.bsNode(uint64(l), L, R, ik) - if ln == nil { // should return node which is nearest to key from the left so never nil + ln, lm, rm = a.bsNode(uint64(l), L, R, ik) + if ln.key == nil || ln.val == nil { // should return node which is nearest to key from the left so never nil L = 0 if a.trace { - fmt.Printf("found nil key %x di=%d lvl=%d naccess_ram=%d\n", level[lm].key, level[lm].d, l, a.naccess) + fmt.Printf("found nil key %x pos_range[%d-%d] naccess_ram=%d\n", l, lm, rm, a.naccess) } panic(fmt.Errorf("nil node at %d", l)) } - if lm >= 0 { - minD = a.nodes[l][lm].d - L = level[lm].fc - } else { - if l+1 != len(a.nodes) { - L = uint64(a.seekLeast(uint64(l+1), minD)) - if L == uint64(len(a.nodes[l+1])) { - L-- - } - } - } - if rm >= 0 { - maxD = a.nodes[l][rm].d - R = level[rm].fc - } else { - if l+1 != len(a.nodes) { - R = uint64(a.seekLeast(uint64(l+1), maxD)) - if R == uint64(len(a.nodes[l+1])) { - R-- - } - } - } switch bytes.Compare(ln.key, ik) { case 1: // key > ik @@ -722,125 +461,62 @@ func (a *btAlloc) seek(ik []byte) (*Cursor, error) { minD = ln.d case 0: if a.trace { - fmt.Printf("found key %x v=%x naccess_ram=%d\n", ik, val /*level[m].d,*/, a.naccess) + fmt.Printf("found key %x v=%x naccess_ram=%d\n", ik, ln.val /*level[m].d,*/, a.naccess) } - return newCursor(context.TODO(), ln.key, val, ln.d), nil + return a.newCursor(context.TODO(), common.Copy(ln.key), common.Copy(ln.val), ln.d), nil } - if a.trace { - fmt.Printf("range={%x d=%d p=%d} (%d, %d) L=%d naccess_ram=%d\n", ln.key, ln.d, ln.p, minD, maxD, l, a.naccess) + + if rm-lm == 1 { + break + } + if lm >= 0 { + minD = a.nodes[l][lm].d + L = level[lm].fc + } else if l+1 != len(a.nodes) { + L = a.seekLeast(uint64(l+1), minD) + if L == uint64(len(a.nodes[l+1])) { + L-- + } + } + if rm >= 0 { + maxD = a.nodes[l][rm].d + R = level[rm].fc + } else if l+1 != len(a.nodes) { + R = a.seekLeast(uint64(l+1), maxD) + if R == uint64(len(a.nodes[l+1])) { + R-- + } } - } - switch bytes.Compare(ik, ln.key) { - case -1: - L = minD // =0 - case 0: if a.trace { - fmt.Printf("last found key %x v=%x di=%d naccess_ram=%d\n", ln.key, ln.val, ln.d, a.naccess) + fmt.Printf("range={%x d=%d p=%d} (%d, %d) L=%d naccess_ram=%d\n", ln.key, ln.d, ln.p, minD, maxD, l, a.naccess) } - return newCursor(context.TODO(), ln.key, val, ln.d), nil - case 1: - L = ln.d + 1 } - a.naccess = 0 // reset count before actually go to storage - cursor, err := a.bsKey(ik, L, maxD) + a.naccess = 0 // reset count before actually go to disk + cursor, err := a.bsKey(ik, minD, maxD) if err != nil { if a.trace { fmt.Printf("key %x not found\n", ik) } return nil, err - } else { - if a.trace { - fmt.Printf("finally found key %x v=%x naccess_disk=%d [err=%v]\n", cursor.key, cursor.value, a.naccess, err) - } - return cursor, nil - } - - return nil, fmt.Errorf("key not found") -} - -// deprecated -func (a *btAlloc) search(ik uint64) bool { - l, r := uint64(0), uint64(len(a.nodes[0])) - lr, hr := uint64(0), a.N - var naccess int64 - var trace bool - for i := 0; i < len(a.nodes); i++ { - for l < r { - m := (l + r) >> 1 - mkey, nf := a.fetchByDi(a.nodes[i][m].d) - naccess++ - if nf { - break - } - if mkey < ik { - lr = mkey - l = m + 1 - } else if mkey == ik { - if trace { - fmt.Printf("found key %d @%d naccess=%d\n", mkey, m, naccess) - } - return true //mkey - } else { - r = m - hr = mkey - } - } - if trace { - fmt.Printf("range={%d,%d} L=%d naccess=%d\n", lr, hr, i, naccess) - } - if i == len(a.nodes) { - if trace { - fmt.Printf("%d|%d - %d|%d\n", l, a.nodes[i][l].d, r, a.nodes[i][r].d) - } - return true - } - if i+1 >= len(a.nodes) { - break - } - l = binsearch(a.nodes[i+1], lr) - r = binsearch(a.nodes[i+1], hr) } - if trace { - fmt.Printf("smallest range %d-%d (%d-%d)\n", lr, hr, l, r) - } - if l == r && l > 0 { - l-- - } - - lr, hr = a.nodes[a.d-1][l].d, a.nodes[a.d-1][r].d - // search in smallest found interval - for lr < hr { - m := (lr + hr) >> 1 - mkey, nf := a.fetchByDi(m) - naccess++ - if nf { - break - } - if mkey < ik { - //lr = mkey - lr = m + 1 - } else if mkey == ik { - if trace { - fmt.Printf("last found key %d @%d naccess=%d\n", mkey, m, naccess) - } - return true //mkey - } else { - hr = m - //hr = mkey - } + if a.trace { + fmt.Printf("finally found key %x v=%x naccess_disk=%d\n", cursor.key, cursor.value, a.naccess) } - - return false + return cursor, nil } -func (a *btAlloc) printSearchMx() { +func (a *btAlloc) fillSearchMx() { for i, n := range a.nodes { - fmt.Printf("D%d |%d| ", i, len(n)) + if a.trace { + fmt.Printf("D%d |%d| ", i, len(n)) + } for j, s := range n { - fmt.Printf("%d ", s.d) + if a.trace { + fmt.Printf("%d ", s.d) + } if s.d >= a.K { break } @@ -856,7 +532,7 @@ func (a *btAlloc) printSearchMx() { } } -// BtIndexReader encapsulates Hash128 to allow concurrent access to Index +// deprecated type BtIndexReader struct { index *BtIndex } @@ -888,11 +564,10 @@ func (r *BtIndexReader) Lookup2(key1, key2 []byte) uint64 { func (r *BtIndexReader) Seek(x []byte) (*Cursor, error) { if r.index != nil { - cursor, err := r.index.alloc.seek(x) + cursor, err := r.index.alloc.Seek(x) if err != nil { return nil, fmt.Errorf("seek key %x: %w", x, err) } - cursor.ix = r.index return cursor, nil } return nil, fmt.Errorf("seek has been failed") @@ -1150,6 +825,7 @@ type BtIndex struct { getter *compress.Getter } +// Opens .kv at dataPath and generates index over it to file 'indexPath' func BuildBtreeIndex(dataPath, indexPath string) error { decomp, err := compress.NewDecompressor(dataPath) if err != nil { @@ -1172,16 +848,21 @@ func BuildBtreeIndex(dataPath, indexPath string) error { key := make([]byte, 0, 64) var pos uint64 + emptys := 0 for getter.HasNext() { - key, _ := getter.Next(key[:0]) + key, kp := getter.Next(key[:0]) err = iw.AddKey(key[:], uint64(pos)) if err != nil { return err } pos = getter.Skip() + if pos-kp == 1 { + emptys++ + } } decomp.Close() + fmt.Printf("emptys %d\n", emptys) if err := iw.Build(); err != nil { return err @@ -1235,11 +916,11 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { } idx.getter = idx.decompressor.MakeGetter() - idx.alloc = newBtAlloc(idx.keyCount, M) + idx.alloc = newBtAlloc(idx.keyCount, M, false) idx.alloc.dataLookup = idx.dataLookup idx.dataoffset = uint64(pos) idx.alloc.traverseDfs() - idx.alloc.printSearchMx() + idx.alloc.fillSearchMx() return idx, nil } @@ -1263,12 +944,13 @@ func (b *BtIndex) dataLookup(di uint64) ([]byte, []byte, error) { return nil, nil, fmt.Errorf("pair %d not found", di) } - key, _ := b.getter.Next(nil) + key, kp := b.getter.Next(nil) if !b.getter.HasNext() { return nil, nil, fmt.Errorf("pair %d not found", di) } - val, _ := b.getter.Next(nil) + val, vp := b.getter.Next(nil) + _, _ = kp, vp return key, val, nil } @@ -1297,11 +979,26 @@ func (b *BtIndex) Close() error { if err := b.file.Close(); err != nil { return err } + if err := b.decompressor.Close(); err != nil { + return err + } return nil } +func (b *BtIndex) Seek(x []byte) (*Cursor, error) { + if b.alloc != nil { + cursor, err := b.alloc.Seek(x) + if err != nil { + return nil, fmt.Errorf("seek key %x: %w", x, err) + } + return cursor, nil + } + return nil, fmt.Errorf("seek has been failed") +} + +// deprecated func (b *BtIndex) Lookup(key []byte) uint64 { - cursor, err := b.alloc.seek(key) + cursor, err := b.alloc.Seek(key) if err != nil { panic(err) } From 6890e9bd5a7dd2608041d398b097becc2a67b86d Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 9 Feb 2023 13:00:37 +0000 Subject: [PATCH 08/54] index replacement for erigon4 begun --- state/aggregator.go | 6 +-- state/aggregator_test.go | 3 ++ state/btree_index.go | 104 ++++++++++++++++++++++++++++++++++++++- state/domain.go | 15 ++++++ state/domain_test.go | 4 +- state/history.go | 5 +- state/merge.go | 18 ++++++- 7 files changed, 145 insertions(+), 10 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 4189e300f..9f5bb0c3e 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -28,9 +28,10 @@ import ( "time" "github.com/holiman/uint256" - "github.com/ledgerwatch/erigon-lib/kv/order" "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon-lib/kv/order" + "github.com/ledgerwatch/erigon-lib/commitment" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/kv" @@ -38,7 +39,7 @@ import ( // StepsInBiggestFile - files of this size are completely frozen/immutable. // files of smaller size are also immutable, but can be removed after merge to bigger files. -const StepsInBiggestFile = 32 +const StepsInBiggestFile = 16 // Reconstruction of the aggregator in another package, `aggregator` @@ -338,7 +339,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return fmt.Errorf("domain collate-build failed: %w", err) } - // TODO questionable ac := a.MakeContext() defer ac.Close() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 88a10b98e..c7194d391 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -479,6 +479,9 @@ func pivotKeysFromKV(dataPath string) ([][]byte, error) { listing := make([][]byte, 0, 1000) for getter.HasNext() { + if len(listing) > 100000 { + break + } key, _ := getter.Next(key[:0]) listing = append(listing, common.Copy(key)) getter.Skip() diff --git a/state/btree_index.go b/state/btree_index.go index 8f1247d01..a7f3a2a8d 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -825,6 +825,56 @@ type BtIndex struct { getter *compress.Getter } +func CreateBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { + err := BuildBtreeIndex(dataPath, indexPath) + if err != nil { + return nil, err + } + return OpenBtreeIndex(indexPath, dataPath, M) +} + +func BuildBtreeIndexWithDecompressor(indexPath string, kv *compress.Decompressor) error { + args := BtIndexWriterArgs{ + IndexFile: indexPath, + TmpDir: filepath.Dir(indexPath), + } + + iw, err := NewBtIndexWriter(args) + if err != nil { + return err + } + + getter := kv.MakeGetter() + getter.Reset(0) + + key := make([]byte, 0, 64) + ks := make(map[int]int) + + var pos uint64 + emptys := 0 + for getter.HasNext() { + key, kp := getter.Next(key[:0]) + err = iw.AddKey(key[:], uint64(pos)) + if err != nil { + return err + } + + pos = getter.Skip() + if pos-kp == 1 { + ks[len(key)]++ + emptys++ + } + } + kv.Close() + fmt.Printf("emptys %d %#+v\n", emptys, ks) + + if err := iw.Build(); err != nil { + return err + } + iw.Close() + return nil +} + // Opens .kv at dataPath and generates index over it to file 'indexPath' func BuildBtreeIndex(dataPath, indexPath string) error { decomp, err := compress.NewDecompressor(dataPath) @@ -871,6 +921,54 @@ func BuildBtreeIndex(dataPath, indexPath string) error { return nil } +func OpenBtreeIndexWithDecompressor(indexPath string, M uint64, kv *compress.Decompressor) (*BtIndex, error) { + s, err := os.Stat(indexPath) + if err != nil { + return nil, err + } + + idx := &BtIndex{ + filePath: indexPath, + size: s.Size(), + modTime: s.ModTime(), + auxBuf: make([]byte, 64), + } + + idx.file, err = os.Open(indexPath) + if err != nil { + return nil, err + } + + if idx.mmapUnix, idx.mmapWin, err = mmap.Mmap(idx.file, int(idx.size)); err != nil { + return nil, err + } + idx.data = idx.mmapUnix[:idx.size] + + // Read number of keys and bytes per record + pos := 8 + idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) + //idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) + idx.bytesPerRec = int(idx.data[pos]) + pos += 1 + + // offset := int(idx.keyCount) * idx.bytesPerRec //+ (idx.keySize * int(idx.keyCount)) + // if offset < 0 { + // return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) + // } + + //p := (*[]byte)(unsafe.Pointer(&idx.data[pos])) + //l := int(idx.keyCount)*idx.bytesPerRec + (16 * int(idx.keyCount)) + + idx.getter = kv.MakeGetter() + + idx.alloc = newBtAlloc(idx.keyCount, M, false) + idx.alloc.dataLookup = idx.dataLookup + idx.dataoffset = uint64(pos) + idx.alloc.traverseDfs() + idx.alloc.fillSearchMx() + return idx, nil +} + func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { s, err := os.Stat(indexPath) if err != nil { @@ -979,8 +1077,10 @@ func (b *BtIndex) Close() error { if err := b.file.Close(); err != nil { return err } - if err := b.decompressor.Close(); err != nil { - return err + if b.decompressor != nil { + if err := b.decompressor.Close(); err != nil { + return err + } } return nil } diff --git a/state/domain.go b/state/domain.go index a9a48f092..9fa38704b 100644 --- a/state/domain.go +++ b/state/domain.go @@ -28,6 +28,7 @@ import ( "path/filepath" "regexp" "strconv" + "strings" "sync/atomic" "time" @@ -801,6 +802,7 @@ func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv type StaticFiles struct { valuesDecomp *compress.Decompressor valuesIdx *recsplit.Index + valuesBt *BtIndex historyDecomp *compress.Decompressor historyIdx *recsplit.Index efHistoryDecomp *compress.Decompressor @@ -864,16 +866,28 @@ func (d *Domain) buildFiles(ctx context.Context, step uint64, collation Collatio } valuesComp.Close() valuesComp = nil + if valuesDecomp, err = compress.NewDecompressor(collation.valuesPath); err != nil { return StaticFiles{}, fmt.Errorf("open %s values decompressor: %w", d.filenameBase, err) } if valuesIdx, err = buildIndex(ctx, valuesDecomp, valuesIdxPath, d.tmpdir, collation.valuesCount, false); err != nil { return StaticFiles{}, fmt.Errorf("build %s values idx: %w", d.filenameBase, err) } + + btPath := strings.TrimSuffix(valuesIdxPath, "kvi") + ".bt" + if err := BuildBtreeIndexWithDecompressor(btPath, valuesDecomp); err != nil { + return StaticFiles{}, fmt.Errorf("build %s values bt idx: %w", d.filenameBase, err) + } + bt, err := OpenBtreeIndexWithDecompressor(btPath, 2048, valuesDecomp) + if err != nil { + return StaticFiles{}, fmt.Errorf("failed to ") + } + closeComp = false return StaticFiles{ valuesDecomp: valuesDecomp, valuesIdx: valuesIdx, + valuesBt: bt, historyDecomp: hStaticFiles.historyDecomp, historyIdx: hStaticFiles.historyIdx, efHistoryDecomp: hStaticFiles.efHistoryDecomp, @@ -977,6 +991,7 @@ func (d *Domain) integrateFiles(sf StaticFiles, txNumFrom, txNumTo uint64) { endTxNum: txNumTo, decompressor: sf.valuesDecomp, index: sf.valuesIdx, + bindex: sf.valuesBt, }) } diff --git a/state/domain_test.go b/state/domain_test.go index 807980c63..84e809016 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -33,7 +33,6 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" - "github.com/ledgerwatch/erigon-lib/recsplit" ) func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { @@ -64,6 +63,7 @@ func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { return path, db, d } +// btree index should work correctly if K < m func TestCollationBuild(t *testing.T) { logEvery := time.NewTicker(30 * time.Second) defer logEvery.Stop() @@ -115,7 +115,7 @@ func TestCollationBuild(t *testing.T) { require.Equal(t, []string{"key1", "value1.2", "key2", "value2.1"}, words) // Check index require.Equal(t, 2, int(sf.valuesIdx.KeyCount())) - r := recsplit.NewIndexReader(sf.valuesIdx) + r := sf.valuesIdx for i := 0; i < len(words); i += 2 { offset := r.Lookup([]byte(words[i])) g.Reset(offset) diff --git a/state/history.go b/state/history.go index 6299c2bfe..5ab88b01f 100644 --- a/state/history.go +++ b/state/history.go @@ -33,14 +33,15 @@ import ( "github.com/RoaringBitmap/roaring/roaring64" "github.com/google/btree" - "github.com/ledgerwatch/erigon-lib/common/dbg" - "github.com/ledgerwatch/erigon-lib/kv/order" "github.com/ledgerwatch/log/v3" btree2 "github.com/tidwall/btree" "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" "golang.org/x/sync/semaphore" + "github.com/ledgerwatch/erigon-lib/common/dbg" + "github.com/ledgerwatch/erigon-lib/kv/order" + "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/cmp" "github.com/ledgerwatch/erigon-lib/common/dir" diff --git a/state/merge.go b/state/merge.go index 5a484338b..ae9f9fc5c 100644 --- a/state/merge.go +++ b/state/merge.go @@ -25,12 +25,13 @@ import ( "path/filepath" "strings" + "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/cmp" "github.com/ledgerwatch/erigon-lib/compress" "github.com/ledgerwatch/erigon-lib/recsplit" "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" - "github.com/ledgerwatch/log/v3" ) func (d *Domain) endTxNumMinimax() uint64 { @@ -483,6 +484,9 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor if indexIn.index != nil { indexIn.index.Close() } + if indexIn.bindex != nil { + indexIn.bindex.Close() + } } if historyIn != nil { if historyIn.decompressor != nil { @@ -627,6 +631,18 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor if valuesIn.index, err = buildIndex(ctx, valuesIn.decompressor, idxPath, d.tmpdir, keyCount, false /* values */); err != nil { return nil, nil, nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) } + + btPath := strings.TrimSuffix(idxPath, "kvi") + "bt" + err = BuildBtreeIndexWithDecompressor(btPath, valuesIn.decompressor) + if err != nil { + return nil, nil, nil, fmt.Errorf("merge %s btindex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) + } + + bt, err := OpenBtreeIndexWithDecompressor(btPath, 2048, valuesIn.decompressor) + if err != nil { + return nil, nil, nil, fmt.Errorf("merge %s btindex2 [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) + } + valuesIn.bindex = bt } closeItem = false d.stats.MergesCount++ From 4e5d525aa12c42d2dbdba5d58aa829977f0ce671 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 9 Feb 2023 14:28:25 +0000 Subject: [PATCH 09/54] working version of bt index --- state/aggregator.go | 8 ++--- state/aggregator_test.go | 73 +++++----------------------------------- state/btree_index.go | 11 +++++- state/domain.go | 2 +- state/domain_test.go | 3 +- 5 files changed, 26 insertions(+), 71 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 22dcb1656..f32732d3b 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -684,10 +684,10 @@ func (a *Aggregator) cleanAfterFreeze(in MergedFiles) { a.storage.cleanAfterFreeze(in.storageHist) a.code.cleanAfterFreeze(in.codeHist) a.commitment.cleanAfterFreeze(in.commitment) - a.logAddrs.cleanAfterFreeze(in.logAddrs) - a.logTopics.cleanAfterFreeze(in.logTopics) - a.tracesFrom.cleanAfterFreeze(in.tracesFrom) - a.tracesTo.cleanAfterFreeze(in.tracesTo) + //a.logAddrs.cleanAfterFreeze(in.logAddrs) + //a.logTopics.cleanAfterFreeze(in.logTopics) + //a.tracesFrom.cleanAfterFreeze(in.tracesFrom) + //a.tracesTo.cleanAfterFreeze(in.tracesTo) } func (ac *AggregatorContext) ReadAccountData(addr []byte, roTx kv.Tx) ([]byte, error) { return ac.accounts.Get(addr, nil, roTx) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index aa3d124f1..db684c3f9 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -440,7 +440,7 @@ func Test_EncodeCommitmentState(t *testing.T) { func Test_BtreeIndex_Seek(t *testing.T) { tmp := t.TempDir() - keyCount, M := 1000, 16 + keyCount, M := 120, 1024 dataPath := generateCompressedKV(t, tmp, 52, 180 /*val size*/, keyCount) defer os.RemoveAll(tmp) @@ -452,13 +452,11 @@ func Test_BtreeIndex_Seek(t *testing.T) { require.NoError(t, err) require.EqualValues(t, bt.KeyCount(), keyCount) - idx := NewBtIndexReader(bt) - keys, err := pivotKeysFromKV(dataPath) require.NoError(t, err) for i := 0; i < len(keys); i++ { - cur, err := idx.Seek(keys[i]) + cur, err := bt.Seek(keys[i]) require.NoErrorf(t, err, "i=%d", i) require.EqualValues(t, keys[i], cur.key) require.NotEmptyf(t, cur.Value(), "i=%d", i) @@ -561,73 +559,20 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount func Test_InitBtreeIndex(t *testing.T) { tmp := t.TempDir() - args := BtIndexWriterArgs{ - IndexFile: path.Join(tmp, "100k.bt"), - TmpDir: tmp, - KeyCount: 12, - } - keySize := 52 - M := uint64(4) - iw, err := NewBtIndexWriter(args) - require.NoError(t, err) - - defer iw.Close() defer os.RemoveAll(tmp) - rnd := rand.New(rand.NewSource(0)) - keys := make([]byte, keySize) - values := make([]byte, 300) - - comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) + keyCount, M := 100, uint64(4) + compPath := generateCompressedKV(t, tmp, 52, 300, keyCount) + decomp, err := compress.NewDecompressor(compPath) require.NoError(t, err) + defer decomp.Close() - for i := 0; i < args.KeyCount; i++ { - n, err := rnd.Read(keys[:52]) - require.EqualValues(t, n, 52) - require.NoError(t, err) - err = comp.AddWord(keys[:n]) - require.NoError(t, err) - - n, err = rnd.Read(values[:rnd.Intn(300)]) - require.NoError(t, err) - - err = comp.AddWord(values[:n]) - require.NoError(t, err) - } - - err = comp.Compress() + err = BuildBtreeIndexWithDecompressor(tmp+".bt", decomp) require.NoError(t, err) - comp.Close() - decomp, err := compress.NewDecompressor(path.Join(tmp, "100k.v2")) + bt, err := OpenBtreeIndexWithDecompressor(tmp+".bt", M, decomp) require.NoError(t, err) - - getter := decomp.MakeGetter() - getter.Reset(0) - - var pos uint64 - for i := 0; i < args.KeyCount; i++ { - if !getter.HasNext() { - t.Fatalf("not enough values at %d", i) - break - } - - keys, _ := getter.Next(keys[:0]) - err = iw.AddKey(keys[:], uint64(pos)) - - pos = getter.Skip() - require.NoError(t, err) - } - decomp.Close() - - require.NoError(t, iw.Build()) - iw.Close() - - // fixme kv is shifted by 1 - // fixme index building functions - bt, err := OpenBtreeIndex(args.IndexFile, path.Join(tmp, "100k.v2"), M) - require.NoError(t, err) - require.EqualValues(t, bt.KeyCount(), args.KeyCount) + require.EqualValues(t, bt.KeyCount(), keyCount) bt.Close() } diff --git a/state/btree_index.go b/state/btree_index.go index a7f3a2a8d..bc5713b91 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -256,6 +256,11 @@ func (a *btAlloc) traverseDfs() { a.nodes[l] = make([]node, 0) } + if len(a.cursors) == 1 { + a.nodes[0] = append(a.nodes[0], node{d: a.K}) + return + } + // TODO if keys less than half leaf size store last key to just support bsearch on these amount. c := a.cursors[len(a.cursors)-1] pc := a.cursors[(len(a.cursors) - 2)] @@ -445,6 +450,11 @@ func (a *btAlloc) Seek(ik []byte) (*Cursor, error) { ) for l, level := range a.nodes { + if len(level) == 1 && l == 0 { + ln = a.nodes[0][0] + maxD = ln.d + break + } ln, lm, rm = a.bsNode(uint64(l), L, R, ik) if ln.key == nil || ln.val == nil { // should return node which is nearest to key from the left so never nil L = 0 @@ -865,7 +875,6 @@ func BuildBtreeIndexWithDecompressor(indexPath string, kv *compress.Decompressor emptys++ } } - kv.Close() fmt.Printf("emptys %d %#+v\n", emptys, ks) if err := iw.Build(); err != nil { diff --git a/state/domain.go b/state/domain.go index f02ed7d84..0a273fca1 100644 --- a/state/domain.go +++ b/state/domain.go @@ -942,7 +942,7 @@ func (d *Domain) buildFiles(ctx context.Context, step uint64, collation Collatio return StaticFiles{}, fmt.Errorf("build %s values idx: %w", d.filenameBase, err) } - btPath := strings.TrimSuffix(valuesIdxPath, "kvi") + ".bt" + btPath := strings.TrimSuffix(valuesIdxPath, "kvi") + "bt" if err := BuildBtreeIndexWithDecompressor(btPath, valuesDecomp); err != nil { return StaticFiles{}, fmt.Errorf("build %s values bt idx: %w", d.filenameBase, err) } diff --git a/state/domain_test.go b/state/domain_test.go index ca3389cc1..27ec4f6e3 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -33,6 +33,7 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/erigon-lib/recsplit" ) func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { @@ -115,7 +116,7 @@ func TestCollationBuild(t *testing.T) { require.Equal(t, []string{"key1", "value1.2", "key2", "value2.1"}, words) // Check index require.Equal(t, 2, int(sf.valuesIdx.KeyCount())) - r := sf.valuesIdx + r := recsplit.NewIndexReader(sf.valuesIdx) for i := 0; i < len(words); i += 2 { offset := r.Lookup([]byte(words[i])) g.Reset(offset) From 5e712d1c45002f142878711264c1cb8ebde5e480 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 9 Feb 2023 21:02:46 +0000 Subject: [PATCH 10/54] introducing btindex to domains --- state/aggregator.go | 6 ++ state/aggregator_test.go | 60 +++++++++++++++++++- state/btree_index.go | 22 ++++++-- state/domain.go | 113 ++++++++++++++++++++++++-------------- state/domain_committed.go | 49 ++++++++++++----- state/merge.go | 6 ++ 6 files changed, 194 insertions(+), 62 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index f32732d3b..059352c37 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -532,6 +532,9 @@ func (sf SelectedStaticFiles) Close() { if item.index != nil { item.index.Close() } + if item.bindex != nil { + item.bindex.Close() + } } } } @@ -584,6 +587,9 @@ func (mf MergedFiles) Close() { if item.decompressor != nil { item.index.Close() } + if item.bindex != nil { + item.bindex.Close() + } } } } diff --git a/state/aggregator_test.go b/state/aggregator_test.go index db684c3f9..1cf5dbae0 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -22,6 +22,7 @@ import ( "github.com/ledgerwatch/erigon-lib/compress" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/erigon-lib/recsplit" ) func testDbAndAggregator(t *testing.T, prefixLen int, aggStep uint64) (string, kv.RwDB, *Aggregator) { @@ -654,7 +655,7 @@ func Test_btree_Seek(t *testing.T) { prevKey = nk } if i%1000 == 0 { - fmt.Printf("%d searches, last took %v total seek time %v avg=%v next_access_last[%d] %v\n", i, took, tsum, tsum/time.Duration(i), j, ntimer/time.Duration(j)) + fmt.Printf("%d searches, last took %v avg=%v next_access_last[of %d keys] %v\n", i, took, tsum/time.Duration(i), j, ntimer/time.Duration(j)) } } @@ -663,3 +664,60 @@ func Test_btree_Seek(t *testing.T) { bt.Close() } + +func Test_Recsplit_Find(t *testing.T) { + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + tmp := t.TempDir() + + defer os.RemoveAll(tmp) + dir, _ := os.Getwd() + fmt.Printf("path %s\n", dir) + dataPath := "../../data/storage.256-288.kv" + indexPath := dataPath + "i" + + idx, err := recsplit.OpenIndex(indexPath) + require.NoError(t, err) + idxr := recsplit.NewIndexReader(idx) + + decomp, err := compress.NewDecompressor(dataPath) + require.NoError(t, err) + defer decomp.Close() + + getter := decomp.MakeGetter() + + keys, err := pivotKeysFromKV(dataPath) + require.NoError(t, err) + + tsum := time.Duration(0) + + var i int + for i = 1; i < 10000000; i++ { + p := rnd.Intn(len(keys)) + cl := time.Now() + offset := idxr.Lookup(keys[p]) + getter.Reset(offset) + + require.True(t, getter.HasNext()) + + key, pa := getter.Next(nil) + require.NotEmpty(t, key) + + value, pb := getter.Next(nil) + if pb-pa != 1 { + require.NotEmpty(t, value) + } + + took := time.Since(cl) + tsum += took + + require.NoErrorf(t, err, "i=%d", i) + require.EqualValues(t, keys[p], key) + + if i%1000 == 0 { + fmt.Printf("%d searches, last took %v avg=%v\n", i, took, tsum/time.Duration(i)) + } + + } + avg := tsum / (1000000 - 1) + fmt.Printf("avg seek time %v\n", avg) +} diff --git a/state/btree_index.go b/state/btree_index.go index bc5713b91..c46857611 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -78,6 +78,10 @@ func (c *Cursor) Key() []byte { return c.key } +func (c *Cursor) Ordinal() uint64 { + return c.d +} + func (c *Cursor) Value() []byte { return c.value } @@ -457,11 +461,10 @@ func (a *btAlloc) Seek(ik []byte) (*Cursor, error) { } ln, lm, rm = a.bsNode(uint64(l), L, R, ik) if ln.key == nil || ln.val == nil { // should return node which is nearest to key from the left so never nil - L = 0 if a.trace { fmt.Printf("found nil key %x pos_range[%d-%d] naccess_ram=%d\n", l, lm, rm, a.naccess) } - panic(fmt.Errorf("nil node at %d", l)) + panic(fmt.Errorf("bt index nil node at level %d", l)) } switch bytes.Compare(ln.key, ik) { @@ -1114,9 +1117,18 @@ func (b *BtIndex) Lookup(key []byte) uint64 { return binary.BigEndian.Uint64(cursor.value) } -func (b *BtIndex) OrdinalLookup(i uint64) uint64 { - //TODO implement me - panic("implement me") +func (b *BtIndex) OrdinalLookup(i uint64) *Cursor { + if i > b.alloc.K { + return nil + } + k, v, err := b.dataLookup(i) + if err != nil { + return nil + } + + return &Cursor{ + key: k, value: v, d: i, ix: b.alloc, + } } func (b *BtIndex) ExtractOffsets() map[uint64]uint64 { diff --git a/state/domain.go b/state/domain.go index 0a273fca1..dce10b6ff 100644 --- a/state/domain.go +++ b/state/domain.go @@ -100,6 +100,15 @@ func (i *filesItem) closeFilesAndRemove() { } i.index = nil } + if i.bindex != nil { + if err := i.bindex.Close(); err != nil { + log.Trace("close", "err", err, "file", i.bindex.FileName()) + } + if err := os.Remove(i.bindex.FilePath()); err != nil { + log.Trace("close", "err", err, "file", i.bindex.FileName()) + } + i.bindex = nil + } } type DomainStats struct { @@ -282,6 +291,14 @@ func (d *Domain) openFiles() error { totalKeys += item.index.KeyCount() } } + if item.bindex == nil { + bidxPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.bt", d.filenameBase, fromStep, toStep)) + if item.bindex, err = OpenBtreeIndexWithDecompressor(bidxPath, 2048, item.decompressor); err != nil { + log.Debug("InvertedIndex.openFiles: %w, %s", err, bidxPath) + return false + } + //totalKeys += item.bindex.KeyCount() + } } return true }) @@ -309,6 +326,12 @@ func (d *Domain) closeFiles() { } item.index = nil } + if item.bindex != nil { + if err := item.bindex.Close(); err != nil { + log.Trace("close", "err", err, "file", item.bindex.FileName()) + } + item.bindex = nil + } } return true }) @@ -552,6 +575,7 @@ type DomainContext struct { d *Domain files []ctxItem getters []*compress.Getter + bts []*BtIndex readers []*recsplit.IndexReader hc *HistoryContext keyBuf [60]byte // 52b key and 8b for inverted step @@ -569,6 +593,19 @@ func (dc *DomainContext) statelessGetter(i int) *compress.Getter { } return r } + +func (dc *DomainContext) statelessBtree(i int) *BtIndex { + if dc.bts == nil { + dc.bts = make([]*BtIndex, len(dc.files)) + } + r := dc.bts[i] + if r == nil { + r = dc.files[i].src.bindex + dc.bts[i] = r + } + return r +} + func (dc *DomainContext) statelessIdxReader(i int) *recsplit.IndexReader { if dc.readers == nil { dc.readers = make([]*recsplit.IndexReader, len(dc.files)) @@ -600,7 +637,8 @@ func (d *Domain) collectFilesStats() (datsz, idxsz, files uint64) { } datsz += uint64(item.decompressor.Size()) idxsz += uint64(item.index.Size()) - files += 2 + idxsz += uint64(item.bindex.Size()) + files += 3 } return true }) @@ -676,26 +714,21 @@ func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) erro heap.Push(&cp, &CursorItem{t: DB_CURSOR, key: common.Copy(k), val: common.Copy(v), c: keysCursor, endTxNum: txNum, reverse: true}) } for i, item := range dc.files { - reader := dc.statelessIdxReader(i) - if reader.Empty() { + bg := dc.statelessBtree(i) + if bg.Empty() { continue } - offset := reader.Lookup(prefix) - // Creating dedicated getter because the one in the item may be used to delete storage, for example - g := dc.statelessGetter(i) - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(prefix); !keyMatch { - continue - } - g.Skip() + + cursor, err := bg.Seek(prefix) + if err != nil { + panic(err) } - if g.HasNext() { - key, _ := g.Next(nil) - if bytes.HasPrefix(key, prefix) { - val, _ := g.Next(nil) - heap.Push(&cp, &CursorItem{t: FILE_CURSOR, key: key, val: val, dg: g, endTxNum: item.endTxNum, reverse: true}) - } + + g := dc.statelessGetter(i) + key := cursor.Key() + if bytes.HasPrefix(key, prefix) { + val := cursor.Value() + heap.Push(&cp, &CursorItem{t: FILE_CURSOR, key: key, val: val, dg: g, endTxNum: item.endTxNum, reverse: true}) } } for cp.Len() > 0 { @@ -1210,19 +1243,20 @@ func (dc *DomainContext) readFromFiles(filekey []byte, fromTxNum uint64) ([]byte if dc.files[i].endTxNum < fromTxNum { break } - reader := dc.statelessIdxReader(i) + reader := dc.statelessBtree(i) if reader.Empty() { continue } - offset := reader.Lookup(filekey) - g := dc.statelessGetter(i) - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(filekey); keyMatch { - val, _ = g.Next(nil) - found = true - break - } + cur, err := reader.Seek(filekey) + if err != nil { + log.Warn("failed to read from file", "key", filekey, "err", err) + continue + } + + if bytes.Equal(cur.Key(), filekey) { + val = cur.Value() + found = true + break } } return val, found @@ -1280,22 +1314,19 @@ func (dc *DomainContext) historyBeforeTxNum(key []byte, txNum uint64, roTx kv.Tx if dc.files[i].startTxNum > topState.startTxNum { continue } - reader := dc.statelessIdxReader(i) + reader := dc.statelessBtree(i) if reader.Empty() { continue } - offset := reader.Lookup(key) - g := dc.statelessGetter(i) - g.Reset(offset) - if g.HasNext() { - if k, _ := g.NextUncompressed(); bytes.Equal(k, key) { - if dc.d.compressVals { - val, _ = g.Next(nil) - } else { - val, _ = g.NextUncompressed() - } - break - } + cur, err := reader.Seek(key) + if err != nil { + log.Warn("failed to read history before from file", "key", key, "err", err) + continue + } + + if bytes.Equal(cur.Key(), key) { + val = cur.Value() + break } } return val, true, nil diff --git a/state/domain_committed.go b/state/domain_committed.go index d3afb6395..cd4b082cd 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -24,6 +24,7 @@ import ( "fmt" "hash" "path/filepath" + "strings" "github.com/google/btree" "github.com/ledgerwatch/log/v3" @@ -33,7 +34,6 @@ import ( "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/compress" - "github.com/ledgerwatch/erigon-lib/recsplit" ) // Defines how to evaluate commitments @@ -213,26 +213,24 @@ func (d *DomainCommitted) replaceKeyWithReference(fullKey, shortKey []byte, type numBuf := [2]byte{} var found bool for _, item := range list { - g := item.decompressor.MakeGetter() - index := recsplit.NewIndexReader(item.index) + //g := item.decompressor.MakeGetter() + //index := recsplit.NewIndexReader(item.index) - offset := index.Lookup(fullKey) - g.Reset(offset) - if !g.HasNext() { + cur, err := item.bindex.Seek(fullKey) + if err != nil { + log.Warn("bt index seek failed", "err", err) continue } - if keyMatch, _ := g.Match(fullKey); keyMatch { - step := uint16(item.endTxNum / d.aggregationStep) - binary.BigEndian.PutUint16(numBuf[:], step) + step := uint16(item.endTxNum / d.aggregationStep) + binary.BigEndian.PutUint16(numBuf[:], step) - shortKey = encodeU64(offset, numBuf[:]) + shortKey = encodeU64(cur.Ordinal(), numBuf[:]) - if d.trace { - fmt.Printf("replacing %s [%x] => {%x} [step=%d, offset=%d, file=%s.%d-%d]\n", typeAS, fullKey, shortKey, step, offset, typeAS, item.startTxNum, item.endTxNum) - } - found = true - break + if d.trace { + fmt.Printf("replacing %s [%x] => {%x} [step=%d, offset=%d, file=%s.%d-%d]\n", typeAS, fullKey, shortKey, step, cur.Ordinal(), typeAS, item.startTxNum, item.endTxNum) } + found = true + break } return found } @@ -345,6 +343,9 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati if indexIn.index != nil { indexIn.index.Close() } + if indexIn.bindex != nil { + indexIn.bindex.Close() + } } if historyIn != nil { if historyIn.decompressor != nil { @@ -353,6 +354,9 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati if historyIn.index != nil { historyIn.index.Close() } + if historyIn.bindex != nil { + historyIn.bindex.Close() + } } if valuesIn != nil { if valuesIn.decompressor != nil { @@ -361,6 +365,9 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati if valuesIn.index != nil { valuesIn.index.Close() } + if valuesIn.bindex != nil { + valuesIn.bindex.Close() + } } } }() @@ -499,6 +506,18 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati if valuesIn.index, err = buildIndex(ctx, valuesIn.decompressor, idxPath, d.dir, keyCount, false /* values */); err != nil { return nil, nil, nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) } + + btPath := strings.TrimSuffix(idxPath, "kvi") + "bt" + err = BuildBtreeIndexWithDecompressor(btPath, valuesIn.decompressor) + if err != nil { + return nil, nil, nil, fmt.Errorf("merge %s btindex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) + } + + bt, err := OpenBtreeIndexWithDecompressor(btPath, 2048, valuesIn.decompressor) + if err != nil { + return nil, nil, nil, fmt.Errorf("merge %s btindex2 [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err) + } + valuesIn.bindex = bt } closeItem = false d.stats.MergesCount++ diff --git a/state/merge.go b/state/merge.go index 371b38b36..dec4c1297 100644 --- a/state/merge.go +++ b/state/merge.go @@ -187,6 +187,9 @@ func (s *staticFilesInRange) Close() { if item.index != nil { item.index.Close() } + if item.bindex != nil { + item.bindex.Close() + } } } } @@ -576,6 +579,9 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor if valuesIn.index != nil { valuesIn.index.Close() } + if valuesIn.bindex != nil { + valuesIn.bindex.Close() + } } } }() From 3990a4ff40537bcbaf6a3dcd467af8a1e5f7dbd9 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 10 Feb 2023 14:20:51 +0000 Subject: [PATCH 11/54] small alignments for e4 running --- state/btree_index.go | 30 ++++++++++++++++++++++-------- state/domain.go | 4 ++-- state/domain_committed.go | 22 ++++++---------------- state/merge.go | 5 +++-- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/state/btree_index.go b/state/btree_index.go index c46857611..28f313471 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -109,7 +109,6 @@ type btAlloc struct { sons [][]uint64 // i - level; 0 <= i < d; j_k - amount, j_k+1 - child count cursors []markupCursor nodes [][]node - data []uint64 naccess uint64 trace bool @@ -117,13 +116,16 @@ type btAlloc struct { } func newBtAlloc(k, M uint64, trace bool) *btAlloc { + if k == 0 { + return nil + } + d := logBase(k, M) a := &btAlloc{ vx: make([]uint64, d+1), sons: make([][]uint64, d+1), cursors: make([]markupCursor, d), nodes: make([][]node, d), - data: make([]uint64, k), M: M, K: k, d: d, @@ -134,6 +136,12 @@ func newBtAlloc(k, M uint64, trace bool) *btAlloc { } a.vx[0], a.vx[d] = 1, k + if k < M/2 { + a.N = k + a.nodes = make([][]node, 1) + return a + } + nnc := func(vx uint64) uint64 { return uint64(math.Ceil(float64(vx) / float64(M))) } @@ -253,14 +261,17 @@ func (a *btAlloc) traverseTrick() { func (a *btAlloc) traverseDfs() { for l := 0; l < len(a.sons)-1; l++ { - if len(a.sons[l]) < 2 { - panic("invalid btree allocation markup") - } + //if len(a.sons[l]) < 2 { + // panic("invalid btree allocation markup") + //} a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} a.nodes[l] = make([]node, 0) } - if len(a.cursors) == 1 { + if len(a.cursors) <= 1 { + if a.nodes[0] == nil { + a.nodes[0] = make([]node, 0) + } a.nodes[0] = append(a.nodes[0], node{d: a.K}) return } @@ -460,7 +471,7 @@ func (a *btAlloc) Seek(ik []byte) (*Cursor, error) { break } ln, lm, rm = a.bsNode(uint64(l), L, R, ik) - if ln.key == nil || ln.val == nil { // should return node which is nearest to key from the left so never nil + if ln.key == nil { // should return node which is nearest to key from the left so never nil if a.trace { fmt.Printf("found nil key %x pos_range[%d-%d] naccess_ram=%d\n", l, lm, rm, a.naccess) } @@ -959,10 +970,13 @@ func OpenBtreeIndexWithDecompressor(indexPath string, M uint64, kv *compress.Dec // Read number of keys and bytes per record pos := 8 idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) - //idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) + if idx.keyCount == 0 { + return idx, nil + } idx.bytesPerRec = int(idx.data[pos]) pos += 1 + // idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) // offset := int(idx.keyCount) * idx.bytesPerRec //+ (idx.keySize * int(idx.keyCount)) // if offset < 0 { // return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) diff --git a/state/domain.go b/state/domain.go index dce10b6ff..88dfcd15c 100644 --- a/state/domain.go +++ b/state/domain.go @@ -721,7 +721,7 @@ func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) erro cursor, err := bg.Seek(prefix) if err != nil { - panic(err) + continue } g := dc.statelessGetter(i) @@ -1249,7 +1249,7 @@ func (dc *DomainContext) readFromFiles(filekey []byte, fromTxNum uint64) ([]byte } cur, err := reader.Seek(filekey) if err != nil { - log.Warn("failed to read from file", "key", filekey, "err", err) + log.Warn("failed to read from file", "file", reader.FileName(), "err", err) continue } diff --git a/state/domain_committed.go b/state/domain_committed.go index cd4b082cd..f5797284d 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -218,7 +218,6 @@ func (d *DomainCommitted) replaceKeyWithReference(fullKey, shortKey []byte, type cur, err := item.bindex.Seek(fullKey) if err != nil { - log.Warn("bt index seek failed", "err", err) continue } step := uint16(item.endTxNum / d.aggregationStep) @@ -232,33 +231,24 @@ func (d *DomainCommitted) replaceKeyWithReference(fullKey, shortKey []byte, type found = true break } + //if !found { + // log.Warn("bt index key replacement seek failed", "key", fmt.Sprintf("%x", fullKey)) + //} return found } func (d *DomainCommitted) lookupShortenedKey(shortKey, fullKey []byte, typAS string, list []*filesItem) bool { fileStep, offset := shortenedKey(shortKey) expected := uint64(fileStep) * d.aggregationStep - var size uint64 - switch typAS { - case "account": - size = length.Addr - case "storage": - size = length.Addr + length.Hash - default: - return false - } var found bool for _, item := range list { if item.startTxNum > expected || item.endTxNum < expected { continue } - g := item.decompressor.MakeGetter() - if uint64(g.Size()) <= offset+size { - continue - } - g.Reset(offset) - fullKey, _ = g.Next(fullKey[:0]) + + cur := item.bindex.OrdinalLookup(offset) + fullKey = cur.Key() if d.trace { fmt.Printf("offsetToKey %s [%x]=>{%x} step=%d offset=%d, file=%s.%d-%d.kv\n", typAS, fullKey, shortKey, fileStep, offset, typAS, item.startTxNum, item.endTxNum) } diff --git a/state/merge.go b/state/merge.go index dec4c1297..29ca513c0 100644 --- a/state/merge.go +++ b/state/merge.go @@ -24,6 +24,7 @@ import ( "fmt" "path/filepath" "strings" + "time" "github.com/ledgerwatch/log/v3" @@ -217,7 +218,7 @@ func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, } }() - //defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) + defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) d.integrateMergedFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, mf.values, mf.index, mf.history) // if err := d.deleteFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles); err != nil { @@ -1066,7 +1067,7 @@ func (d *Domain) integrateMergedFiles(valuesOuts, indexOuts, historyOuts []*file // `kill -9` may leave some garbage // but it still may be useful for merges, until we finish merge frozen file - if historyIn.frozen { + if historyIn != nil && historyIn.frozen { d.files.Walk(func(items []*filesItem) bool { for _, item := range items { if item.frozen || item.endTxNum > valuesIn.endTxNum { From 4e6a22717c6d56bed2e064ed1e02cff09833f0ec Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 10 Feb 2023 17:17:12 +0000 Subject: [PATCH 12/54] make seek find key >= x, add test and deprecated prefix copying in domain files --- state/aggregator_test.go | 27 ++++++++++++----- state/btree_index.go | 15 +++++----- state/domain.go | 30 +++++++++---------- state/domain_committed.go | 62 +++++++++++++++++++-------------------- state/merge.go | 44 +++++++++++++-------------- 5 files changed, 95 insertions(+), 83 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 1cf5dbae0..e7f387624 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -441,7 +441,7 @@ func Test_EncodeCommitmentState(t *testing.T) { func Test_BtreeIndex_Seek(t *testing.T) { tmp := t.TempDir() - keyCount, M := 120, 1024 + keyCount, M := 120000, 1024 dataPath := generateCompressedKV(t, tmp, 52, 180 /*val size*/, keyCount) defer os.RemoveAll(tmp) @@ -463,6 +463,18 @@ func Test_BtreeIndex_Seek(t *testing.T) { require.NotEmptyf(t, cur.Value(), "i=%d", i) // require.EqualValues(t, uint64(i), cur.Value()) } + for i := 1; i < len(keys); i++ { + alt := common.Copy(keys[i]) + for j := len(alt) - 1; j >= 0; j-- { + if alt[j] > 0 { + alt[j] -= 1 + break + } + } + cur, err := bt.Seek(keys[i]) + require.NoError(t, err) + require.EqualValues(t, keys[i], cur.Key()) + } bt.Close() } @@ -495,7 +507,7 @@ func pivotKeysFromKV(dataPath string) ([][]byte, error) { func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount int) string { args := BtIndexWriterArgs{ - IndexFile: path.Join(tmp, "100k.bt"), + IndexFile: path.Join(tmp, fmt.Sprintf("%dk.bt", keyCount/1000)), TmpDir: tmp, KeyCount: 12, } @@ -507,19 +519,20 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount rnd := rand.New(rand.NewSource(0)) values := make([]byte, valueSize) - comp, err := compress.NewCompressor(context.Background(), "cmp", path.Join(tmp, "100k.v2"), tmp, compress.MinPatternScore, 1, log.LvlDebug) + dataPath := path.Join(tmp, fmt.Sprintf("%dk.kv", keyCount/1000)) + comp, err := compress.NewCompressor(context.Background(), "cmp", dataPath, tmp, compress.MinPatternScore, 1, log.LvlDebug) require.NoError(t, err) for i := 0; i < keyCount; i++ { - // n, err := rnd.Read(keys[:52]) - // require.EqualValues(t, n, 52) key := make([]byte, keySize) + n, err := rnd.Read(key[:]) + require.EqualValues(t, keySize, n) binary.BigEndian.PutUint64(key[keySize-8:], uint64(i)) require.NoError(t, err) err = comp.AddWord(key[:]) require.NoError(t, err) - n, err := rnd.Read(values[:rnd.Intn(valueSize)+1]) + n, err = rnd.Read(values[:rnd.Intn(valueSize)+1]) require.NoError(t, err) err = comp.AddWord(values[:n]) @@ -530,7 +543,7 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount require.NoError(t, err) comp.Close() - decomp, err := compress.NewDecompressor(path.Join(tmp, "100k.v2")) + decomp, err := compress.NewDecompressor(dataPath) require.NoError(t, err) getter := decomp.MakeGetter() diff --git a/state/btree_index.go b/state/btree_index.go index 28f313471..34e14bc31 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -413,7 +413,11 @@ func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { break } } - return nil, fmt.Errorf("key %x was not found", x) + k, v, err := a.dataLookup(l) + if err != nil { + return nil, fmt.Errorf("key >= %x was not found at pos %d", x, l) + } + return a.newCursor(context.TODO(), k, v, l), nil } func (a *btAlloc) bsNode(i, l, r uint64, x []byte) (n node, lm int64, rm int64) { @@ -921,21 +925,16 @@ func BuildBtreeIndex(dataPath, indexPath string) error { key := make([]byte, 0, 64) var pos uint64 - emptys := 0 for getter.HasNext() { - key, kp := getter.Next(key[:0]) + key, _ := getter.Next(key[:0]) err = iw.AddKey(key[:], uint64(pos)) if err != nil { return err } pos = getter.Skip() - if pos-kp == 1 { - emptys++ - } } decomp.Close() - fmt.Printf("emptys %d\n", emptys) if err := iw.Build(); err != nil { return err @@ -1049,7 +1048,7 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { } func (b *BtIndex) dataLookup(di uint64) ([]byte, []byte, error) { - if b.keyCount <= di { + if b.keyCount < di { return nil, nil, fmt.Errorf("ki is greater than key count in index") } diff --git a/state/domain.go b/state/domain.go index 88dfcd15c..cf584f728 100644 --- a/state/domain.go +++ b/state/domain.go @@ -166,7 +166,7 @@ func NewDomain( d := &Domain{ keysTable: keysTable, valsTable: valsTable, - prefixLen: prefixLen, + //prefixLen: prefixLen, files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), roFiles: *atomic2.NewPointer(&[]ctxItem{}), } @@ -685,9 +685,9 @@ func (dc *DomainContext) Close() { // inside the domain. Another version of this for public API use needs to be created, that uses // roTx instead and supports ending the iterations before it reaches the end. func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) error { - if len(prefix) != dc.d.prefixLen { - return fmt.Errorf("wrong prefix length, this %s domain supports prefixLen %d, given [%x]", dc.d.filenameBase, dc.d.prefixLen, prefix) - } + //if len(prefix) != dc.d.prefixLen { + // return fmt.Errorf("wrong prefix length, this %s domain supports prefixLen %d, given [%x]", dc.d.filenameBase, dc.d.prefixLen, prefix) + //} atomic.AddUint64(&dc.d.stats.HistoryQueries, 1) var cp CursorHeap @@ -831,7 +831,7 @@ func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv defer keysCursor.Close() var ( - prefix []byte // Track prefix to insert it before entries + //prefix []byte // Track prefix to insert it before entries k, v []byte pos uint64 valuesCount uint @@ -866,16 +866,16 @@ func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv if err != nil { return Collation{}, fmt.Errorf("find last %s value for aggregation step k=[%x]: %w", d.filenameBase, k, err) } - if d.prefixLen > 0 && (prefix == nil || !bytes.HasPrefix(k, prefix)) { - prefix = append(prefix[:0], k[:d.prefixLen]...) - if err = valuesComp.AddUncompressedWord(prefix); err != nil { - return Collation{}, fmt.Errorf("add %s values prefix [%x]: %w", d.filenameBase, prefix, err) - } - if err = valuesComp.AddUncompressedWord(nil); err != nil { - return Collation{}, fmt.Errorf("add %s values prefix val [%x]: %w", d.filenameBase, prefix, err) - } - valuesCount++ - } + //if d.prefixLen > 0 && (prefix == nil || !bytes.HasPrefix(k, prefix)) { + // prefix = append(prefix[:0], k[:d.prefixLen]...) + // if err = valuesComp.AddUncompressedWord(prefix); err != nil { + // return Collation{}, fmt.Errorf("add %s values prefix [%x]: %w", d.filenameBase, prefix, err) + // } + // if err = valuesComp.AddUncompressedWord(nil); err != nil { + // return Collation{}, fmt.Errorf("add %s values prefix val [%x]: %w", d.filenameBase, prefix, err) + // } + // valuesCount++ + //} if err = valuesComp.AddUncompressedWord(k); err != nil { return Collation{}, fmt.Errorf("add %s values key [%x]: %w", d.filenameBase, k, err) } diff --git a/state/domain_committed.go b/state/domain_committed.go index f5797284d..8304e46f4 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -427,38 +427,38 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati heap.Pop(&cp) } } - var skip bool - if d.prefixLen > 0 { - skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen - } else { - // For the rest of types, empty value means deletion - skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 - } + //var skip bool + //if d.prefixLen > 0 { + // skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen + //} else { + // For the rest of types, empty value means deletion + skip := r.valuesStartTxNum == 0 && len(lastVal) == 0 + //} if !skip { - if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { - if err = comp.AddUncompressedWord(keyBuf); err != nil { - return nil, nil, nil, err - } - keyCount++ // Only counting keys, not values - - if d.trace { - fmt.Printf("merge: multi-way key %x, total keys %d\n", keyBuf, keyCount) - } - - valBuf, err = d.commitmentValTransform(&oldFiles, &mergedFiles, valBuf) - if err != nil { - return nil, nil, nil, fmt.Errorf("merge: valTransform [%x] %w", valBuf, err) - } - if d.compressVals { - if err = comp.AddWord(valBuf); err != nil { - return nil, nil, nil, err - } - } else { - if err = comp.AddUncompressedWord(valBuf); err != nil { - return nil, nil, nil, err - } - } - } + //if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { + // if err = comp.AddUncompressedWord(keyBuf); err != nil { + // return nil, nil, nil, err + // } + // keyCount++ // Only counting keys, not values + // + // if d.trace { + // fmt.Printf("merge: multi-way key %x, total keys %d\n", keyBuf, keyCount) + // } + // + // valBuf, err = d.commitmentValTransform(&oldFiles, &mergedFiles, valBuf) + // if err != nil { + // return nil, nil, nil, fmt.Errorf("merge: valTransform [%x] %w", valBuf, err) + // } + // if d.compressVals { + // if err = comp.AddWord(valBuf); err != nil { + // return nil, nil, nil, err + // } + // } else { + // if err = comp.AddUncompressedWord(valBuf); err != nil { + // return nil, nil, nil, err + // } + // } + //} keyBuf = append(keyBuf[:0], lastKey...) valBuf = append(valBuf[:0], lastVal...) } diff --git a/state/merge.go b/state/merge.go index 29ca513c0..41b0e738e 100644 --- a/state/merge.go +++ b/state/merge.go @@ -654,29 +654,29 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor heap.Pop(&cp) } } - var skip bool - if d.prefixLen > 0 { - skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen - } else { - // For the rest of types, empty value means deletion - skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 - } + //var skip bool + //if d.prefixLen > 0 { + // skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen + //} else { + // For the rest of types, empty value means deletion + skip := r.valuesStartTxNum == 0 && len(lastVal) == 0 + //} if !skip { - if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { - if err = comp.AddUncompressedWord(keyBuf); err != nil { - return nil, nil, nil, err - } - keyCount++ // Only counting keys, not values - if d.compressVals { - if err = comp.AddWord(valBuf); err != nil { - return nil, nil, nil, err - } - } else { - if err = comp.AddUncompressedWord(valBuf); err != nil { - return nil, nil, nil, err - } - } - } + //if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { + // if err = comp.AddUncompressedWord(keyBuf); err != nil { + // return nil, nil, nil, err + // } + // keyCount++ // Only counting keys, not values + // if d.compressVals { + // if err = comp.AddWord(valBuf); err != nil { + // return nil, nil, nil, err + // } + // } else { + // if err = comp.AddUncompressedWord(valBuf); err != nil { + // return nil, nil, nil, err + // } + // } + //} keyBuf = append(keyBuf[:0], lastKey...) valBuf = append(valBuf[:0], lastVal...) } From f98c1af05d3189d939b0c04f4851634210804b4e Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 15 Feb 2023 16:49:29 +0000 Subject: [PATCH 13/54] split collate --- state/aggregator.go | 2 +- state/btree_index.go | 3 +- state/domain.go | 129 +++++++++++++++++++++++++++++++++++++++++++ state/merge.go | 22 +------- 4 files changed, 132 insertions(+), 24 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 059352c37..1d1a1c9a9 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -292,7 +292,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { for i, d := range []*Domain{a.accounts, a.storage, a.code, a.commitment.Domain} { wg.Add(1) - collation, err := d.collate(ctx, step, txFrom, txTo, d.tx, logEvery) + collation, err := d.collateStream(ctx, step, txFrom, txTo, d.tx, logEvery) if err != nil { collation.Close() return fmt.Errorf("domain collation %q has failed: %w", d.filenameBase, err) diff --git a/state/btree_index.go b/state/btree_index.go index 34e14bc31..3a64fe9f3 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -276,7 +276,6 @@ func (a *btAlloc) traverseDfs() { return } - // TODO if keys less than half leaf size store last key to just support bsearch on these amount. c := a.cursors[len(a.cursors)-1] pc := a.cursors[(len(a.cursors) - 2)] root := new(node) @@ -651,7 +650,7 @@ func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { } btw.bucketCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) - btw.bucketCollector.LogLvl(log.LvlDebug) + btw.bucketCollector.LogLvl(log.LvlError) //btw.offsetCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) //btw.offsetCollector.LogLvl(log.LvlDebug) diff --git a/state/domain.go b/state/domain.go index cf584f728..cd2b432e6 100644 --- a/state/domain.go +++ b/state/domain.go @@ -36,6 +36,7 @@ import ( "github.com/ledgerwatch/log/v3" btree2 "github.com/tidwall/btree" atomic2 "go.uber.org/atomic" + "golang.org/x/sync/errgroup" "golang.org/x/sync/semaphore" "github.com/ledgerwatch/erigon-lib/kv/bitmapdb" @@ -797,6 +798,134 @@ func (c Collation) Close() { } } +type kvpair struct { + k, v []byte +} + +func (d *Domain) collator(valuesComp *compress.Compressor, pairs chan kvpair) (count int, err error) { + for kv := range pairs { + if err = valuesComp.AddUncompressedWord(kv.k); err != nil { + return count, fmt.Errorf("add %s values key [%x]: %w", d.filenameBase, kv.k, err) + } + count++ // Only counting keys, not values + if err = valuesComp.AddUncompressedWord(kv.v); err != nil { + return count, fmt.Errorf("add %s values val [%x]=>[%x]: %w", d.filenameBase, kv.k, kv.v, err) + } + } + return count, nil +} + +// collate gathers domain changes over the specified step, using read-only transaction, +// and returns compressors, elias fano, and bitmaps +// [txFrom; txTo) +func (d *Domain) collateStream(ctx context.Context, step, txFrom, txTo uint64, roTx kv.Tx, logEvery *time.Ticker) (Collation, error) { + started := time.Now() + defer func() { + d.stats.LastCollationTook = time.Since(started) + }() + + hCollation, err := d.History.collate(step, txFrom, txTo, roTx, logEvery) + if err != nil { + return Collation{}, err + } + + var valuesComp *compress.Compressor + closeComp := true + defer func() { + if closeComp { + if valuesComp != nil { + valuesComp.Close() + } + } + }() + + valuesPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, step, step+1)) + if valuesComp, err = compress.NewCompressor(context.Background(), "collate values", valuesPath, d.tmpdir, compress.MinPatternScore, 1, log.LvlTrace); err != nil { + return Collation{}, fmt.Errorf("create %s values compressor: %w", d.filenameBase, err) + } + + keysCursor, err := roTx.CursorDupSort(d.keysTable) + if err != nil { + return Collation{}, fmt.Errorf("create %s keys cursor: %w", d.filenameBase, err) + } + defer keysCursor.Close() + + var ( + k, v []byte + pos uint64 + valCount uint + pairs = make(chan kvpair, 4) + ) + + totalKeys, err := keysCursor.Count() + if err != nil { + return Collation{}, fmt.Errorf("failed to obtain keys count for domain %q", d.filenameBase) + } + + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + count, err := d.collator(valuesComp, pairs) + if err != nil { + return err + } + valCount = uint(count) + return nil + }) + + for k, _, err = keysCursor.First(); err == nil && k != nil; k, _, err = keysCursor.NextNoDup() { + pos++ + + select { + case <-logEvery.C: + log.Info("[snapshots] collate domain", "name", d.filenameBase, + "range", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep)), + "progress", fmt.Sprintf("%.2f%%", float64(pos)/float64(totalKeys)*100)) + case <-ctx.Done(): + log.Warn("[snapshots] collate domain cancelled", "name", d.filenameBase, "err", ctx.Err()) + close(pairs) + + return Collation{}, err + default: + } + + if v, err = keysCursor.LastDup(); err != nil { + return Collation{}, fmt.Errorf("find last %s key for aggregation step k=[%x]: %w", d.filenameBase, k, err) + } + s := ^binary.BigEndian.Uint64(v) + if s == step { + keySuffix := make([]byte, len(k)+8) + copy(keySuffix, k) + copy(keySuffix[len(k):], v) + + v, err := roTx.GetOne(d.valsTable, keySuffix) + if err != nil { + return Collation{}, fmt.Errorf("find last %s value for aggregation step k=[%x]: %w", d.filenameBase, k, err) + } + + pairs <- kvpair{k: k, v: v} + } + } + close(pairs) + if err != nil { + return Collation{}, fmt.Errorf("iterate over %s keys cursor: %w", d.filenameBase, err) + } + + if err := eg.Wait(); err != nil { + return Collation{}, fmt.Errorf("collate over %s keys cursor: %w", d.filenameBase, err) + } + + closeComp = false + return Collation{ + valuesPath: valuesPath, + valuesComp: valuesComp, + valuesCount: int(valCount), + historyPath: hCollation.historyPath, + historyComp: hCollation.historyComp, + historyCount: hCollation.historyCount, + indexBitmaps: hCollation.indexBitmaps, + }, nil +} + // collate gathers domain changes over the specified step, using read-only transaction, // and returns compressors, elias fano, and bitmaps // [txFrom; txTo) diff --git a/state/merge.go b/state/merge.go index 41b0e738e..f1e473721 100644 --- a/state/merge.go +++ b/state/merge.go @@ -654,29 +654,9 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor heap.Pop(&cp) } } - //var skip bool - //if d.prefixLen > 0 { - // skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen - //} else { - // For the rest of types, empty value means deletion + skip := r.valuesStartTxNum == 0 && len(lastVal) == 0 - //} if !skip { - //if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { - // if err = comp.AddUncompressedWord(keyBuf); err != nil { - // return nil, nil, nil, err - // } - // keyCount++ // Only counting keys, not values - // if d.compressVals { - // if err = comp.AddWord(valBuf); err != nil { - // return nil, nil, nil, err - // } - // } else { - // if err = comp.AddUncompressedWord(valBuf); err != nil { - // return nil, nil, nil, err - // } - // } - //} keyBuf = append(keyBuf[:0], lastKey...) valBuf = append(valBuf[:0], lastVal...) } From de50431c02d6e7f2576c953c3dbb41491558aa99 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 16 Feb 2023 13:24:11 +0000 Subject: [PATCH 14/54] btree small cleanup and try to speedup prune in domains --- state/btree_index.go | 94 ++++---------------------------------------- state/domain.go | 69 ++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 116 deletions(-) diff --git a/state/btree_index.go b/state/btree_index.go index 3a64fe9f3..d0d62755b 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -555,7 +555,9 @@ func (a *btAlloc) fillSearchMx() { a.nodes[i][j].key = common.Copy(kb) a.nodes[i][j].val = common.Copy(v) } - fmt.Printf("\n") + if a.trace { + fmt.Printf("\n") + } } } @@ -638,21 +640,19 @@ const BtreeLogPrefix = "btree" // salt parameters is used to randomise the hash function construction, to ensure that different Erigon instances (nodes) // are likely to use different hash function, to collision attacks are unlikely to slow down any meaningful number of nodes at the same time func NewBtIndexWriter(args BtIndexWriterArgs) (*BtIndexWriter, error) { - btw := &BtIndexWriter{} + btw := &BtIndexWriter{lvl: log.LvlDebug} btw.tmpDir = args.TmpDir btw.indexFile = args.IndexFile + _, fname := filepath.Split(btw.indexFile) btw.indexFileName = fname - //btw.baseDataID = args.BaseDataID btw.etlBufLimit = args.EtlBufLimit if btw.etlBufLimit == 0 { btw.etlBufLimit = etl.BufferOptimalSize } btw.bucketCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) - btw.bucketCollector.LogLvl(log.LvlError) - //btw.offsetCollector = etl.NewCollector(BtreeLogPrefix+" "+fname, btw.tmpDir, etl.NewSortableBuffer(btw.etlBufLimit)) - //btw.offsetCollector.LogLvl(log.LvlDebug) + btw.bucketCollector.LogLvl(log.LvlDebug) btw.maxOffset = 0 return btw, nil @@ -679,58 +679,6 @@ func (btw *BtIndexWriter) loadFuncBucket(k, v []byte, _ etl.CurrentTableReader, return nil } -// -//func (rs *BtIndexWriter) drainBatch() error { -// // Extend rs.bucketSizeAcc to accomodate current bucket index + 1 -// //for len(rs.bucketSizeAcc) <= int(rs.currentBucketIdx)+1 { -// // rs.bucketSizeAcc = append(rs.bucketSizeAcc, rs.bucketSizeAcc[len(rs.bucketSizeAcc)-1]) -// //} -// //rs.bucketSizeAcc[int(rs.currentBucketIdx)+1] += uint64(len(rs.currentBucket)) -// //// Sets of size 0 and 1 are not further processed, just write them to index -// //if len(rs.currentBucket) > 1 { -// // for i, key := range rs.currentBucket[1:] { -// // if key == rs.currentBucket[i] { -// // rs.collision = true -// // return fmt.Errorf("%w: %x", ErrCollision, key) -// // } -// // } -// // bitPos := rs.gr.bitCount -// // if rs.buffer == nil { -// // rs.buffer = make([]uint64, len(rs.currentBucket)) -// // rs.offsetBuffer = make([]uint64, len(rs.currentBucketOffs)) -// // } else { -// // for len(rs.buffer) < len(rs.currentBucket) { -// // rs.buffer = append(rs.buffer, 0) -// // rs.offsetBuffer = append(rs.offsetBuffer, 0) -// // } -// // } -// // unary, err := rs.recsplit(0 /* level */, rs.currentBucket, rs.currentBucketOffs, nil /* unary */) -// // if err != nil { -// // return err -// // } -// // rs.gr.appendUnaryAll(unary) -// // if rs.trace { -// // fmt.Printf("recsplitBucket(%d, %d, bitsize = %d)\n", rs.currentBucketIdx, len(rs.currentBucket), rs.gr.bitCount-bitPos) -// // } -// //} else { -// var j int -// for _, offset := range rs.vals { -// binary.BigEndian.PutUint64(rs.numBuf[:], offset) -// rs.indexW.Write(rs.keys[j]) -// if _, err := rs.indexW.Write(rs.numBuf[8-rs.bytesPerRec:]); err != nil { -// return err -// } -// } -// //} -// //// Extend rs.bucketPosAcc to accomodate current bucket index + 1 -// //for len(rs.bucketPosAcc) <= int(rs.currentBucketIdx)+1 { -// // rs.bucketPosAcc = append(rs.bucketPosAcc, rs.bucketPosAcc[len(rs.bucketPosAcc)-1]) -// //} -// //rs.bucketPosAcc[int(rs.currentBucketIdx)+1] = uint64(rs.gr.Bits()) -// rs.keys = rs.keys[:0] -// rs.vals = rs.vals[:0] -// return nil -//} // Build has to be called after all the keys have been added, and it initiates the process // of building the perfect hash function and writing index into a file @@ -751,11 +699,6 @@ func (btw *BtIndexWriter) Build() error { defer btw.indexF.Close() btw.indexW = bufio.NewWriterSize(btw.indexF, etl.BufIOSize) defer btw.indexW.Flush() - // Write minimal app-specific dataID in this index file - //binary.BigEndian.PutUint64(btw.numBuf[:], btw.baseDataID) - //if _, err = btw.indexW.Write(btw.numBuf[:]); err != nil { - // return fmt.Errorf("write baseDataID: %w", err) - //} // Write number of keys binary.BigEndian.PutUint64(btw.numBuf[:], btw.keyCount) @@ -774,15 +717,6 @@ func (btw *BtIndexWriter) Build() error { return err } - //if ASSERT { - // btw.indexW.Flush() - // btw.indexF.Seek(0, 0) - // b, _ := io.ReadAll(btw.indexF) - // if len(b) != 9+int(btw.keysAdded)*btw.bytesPerRec { - // panic(fmt.Errorf("expected: %d, got: %d; btw.keysAdded=%d, btw.bytesPerRec=%d, %s", 9+int(btw.keysAdded)*btw.bytesPerRec, len(b), btw.keysAdded, btw.bytesPerRec, btw.indexFile)) - // } - //} - log.Log(btw.lvl, "[index] write", "file", btw.indexFileName) btw.built = true @@ -805,10 +739,6 @@ func (btw *BtIndexWriter) Close() { //} } -// func (btw *BtIndexWriter) Add(key, value []byte) error { - -// } - func (btw *BtIndexWriter) AddKey(key []byte, offset uint64) error { if btw.built { return fmt.Errorf("cannot add keys after perfect hash function had been built") @@ -892,7 +822,7 @@ func BuildBtreeIndexWithDecompressor(indexPath string, kv *compress.Decompressor emptys++ } } - fmt.Printf("emptys %d %#+v\n", emptys, ks) + //fmt.Printf("emptys %d %#+v\n", emptys, ks) if err := iw.Build(); err != nil { return err @@ -974,12 +904,6 @@ func OpenBtreeIndexWithDecompressor(indexPath string, M uint64, kv *compress.Dec idx.bytesPerRec = int(idx.data[pos]) pos += 1 - // idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) - // offset := int(idx.keyCount) * idx.bytesPerRec //+ (idx.keySize * int(idx.keyCount)) - // if offset < 0 { - // return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexPath) - // } - //p := (*[]byte)(unsafe.Pointer(&idx.data[pos])) //l := int(idx.keyCount)*idx.bytesPerRec + (16 * int(idx.keyCount)) @@ -1019,7 +943,6 @@ func OpenBtreeIndex(indexPath, dataPath string, M uint64) (*BtIndex, error) { // Read number of keys and bytes per record pos := 8 idx.keyCount = binary.BigEndian.Uint64(idx.data[:pos]) - //idx.baseDataID = binary.BigEndian.Uint64(idx.data[pos:8]) idx.bytesPerRec = int(idx.data[pos]) pos += 1 @@ -1080,9 +1003,6 @@ func (b *BtIndex) Size() int64 { return b.size } func (b *BtIndex) ModTime() time.Time { return b.modTime } -// Deprecated -func (b *BtIndex) BaseDataID() uint64 { return b.baseDataID } - func (b *BtIndex) FilePath() string { return b.filePath } func (b *BtIndex) FileName() string { return path.Base(b.filePath) } diff --git a/state/domain.go b/state/domain.go index cd2b432e6..b06b95de3 100644 --- a/state/domain.go +++ b/state/domain.go @@ -1264,33 +1264,41 @@ func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uin // It is important to clean up tables in a specific order // First keysTable, because it is the first one access in the `get` function, i.e. if the record is deleted from there, other tables will not be accessed - for k, v, err = keysCursor.First(); err == nil && k != nil; k, v, err = keysCursor.Next() { - select { - case <-logEvery.C: - log.Info("[snapshots] prune domain", "name", d.filenameBase, "stage", "prune keys", "range", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep))) - case <-ctx.Done(): - log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) - return err - default: - if bytes.Equal(stepBytes, v) { - if maxS := keyMaxSteps[string(k)]; maxS <= step { - continue - } - if err = keysCursor.DeleteCurrent(); err != nil { - return fmt.Errorf("clean up %s for [%x]=>[%x]: %w", d.filenameBase, k, v, err) - } - } - } - } - if err != nil { - return fmt.Errorf("iterate of %s keys: %w", d.filenameBase, err) - } + //for k, v := range keyMaxSteps { + // + //} + //for k, v, err = keysCursor.First(); err == nil && k != nil; k, v, err = keysCursor.Next() { + // select { + // case <-logEvery.C: + // log.Info("[snapshots] prune domain", "name", d.filenameBase, "stage", "prune keys", "range", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep))) + // case <-ctx.Done(): + // log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) + // return err + // default: + // if bytes.Equal(stepBytes, v) { + // if maxS := keyMaxSteps[string(k)]; maxS <= step { + // continue + // } + // if err = keysCursor.DeleteCurrent(); err != nil { + // return fmt.Errorf("clean up %s for [%x]=>[%x]: %w", d.filenameBase, k, v, err) + // } + // } + // } + //} + //if err != nil { + // return fmt.Errorf("iterate of %s keys: %w", d.filenameBase, err) + //} var valsCursor kv.RwCursor if valsCursor, err = d.tx.RwCursor(d.valsTable); err != nil { return fmt.Errorf("%s vals cursor: %w", d.filenameBase, err) } defer valsCursor.Close() - for k, _, err = valsCursor.First(); err == nil && k != nil; k, _, err = valsCursor.Next() { + //for k, _, err = valsCursor.First(); err == nil && k != nil; k, _, err = valsCursor.Next() { + for k, s := range keyMaxSteps { + if s <= step { + continue + } + select { case <-logEvery.C: log.Info("[snapshots] prune domain", "name", d.filenameBase, "stage", "prune values", "range", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep))) @@ -1298,14 +1306,17 @@ func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uin log.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) return err default: - if bytes.Equal(stepBytes, k[len(k)-8:]) { - if maxS := keyMaxSteps[string(k[len(k)-8:])]; maxS <= step { - continue - } - if err = valsCursor.DeleteCurrent(); err != nil { - return fmt.Errorf("clean up %s for [%x]: %w", d.filenameBase, k, err) - } + //if bytes.Equal(stepBytes, k[len(k)-8:]) { + //if maxS := keyMaxSteps[string(k[len(k)-8:])]; maxS <= step { + // continue + //} + if err = keysCursor.DeleteExact([]byte(k), stepBytes); err != nil { + return fmt.Errorf("clean up key %s for [%x]: %w", d.filenameBase, k, err) + } + if err = valsCursor.Delete([]byte(k)); err != nil { + return fmt.Errorf("clean up %s for [%x]: %w", d.filenameBase, k, err) } + //} } } if err != nil { From 2a200eb686f3aad34d1110087aa8d1bed6e6beb4 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 17 Feb 2023 10:34:14 +0000 Subject: [PATCH 15/54] bit more info about aggregations/merges --- compress/decompress_test.go | 51 ++++++++++++++++++++++++ state/aggregator.go | 77 ++++++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 27 deletions(-) diff --git a/compress/decompress_test.go b/compress/decompress_test.go index 9d903abd8..209ad9992 100644 --- a/compress/decompress_test.go +++ b/compress/decompress_test.go @@ -98,6 +98,57 @@ func TestDecompressMatchOK(t *testing.T) { } } +func prepareStupidDict(t *testing.T, size int) *Decompressor { + t.Helper() + tmpDir := t.TempDir() + file := filepath.Join(tmpDir, "compressed2") + t.Name() + c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug) + if err != nil { + t.Fatal(err) + } + defer c.Close() + for i := 0; i < size; i++ { + if err = c.AddWord([]byte(fmt.Sprintf("word-%d", i))); err != nil { + t.Fatal(err) + } + } + if err = c.Compress(); err != nil { + t.Fatal(err) + } + var d *Decompressor + if d, err = NewDecompressor(file); err != nil { + t.Fatal(err) + } + return d +} + +func TestDecompressMatchOKCondensed(t *testing.T) { + condensePatternTableBitThreshold = 4 + d := prepareStupidDict(t, 10000) + defer func() { condensePatternTableBitThreshold = 9 }() + defer d.Close() + + g := d.MakeGetter() + i := 0 + for g.HasNext() { + if i%2 != 0 { + expected := fmt.Sprintf("word-%d", i) + ok, _ := g.Match([]byte(expected)) + if !ok { + t.Errorf("expexted match with %s", expected) + } + } else { + word, _ := g.Next(nil) + expected := fmt.Sprintf("word-%d", i) + if string(word) != expected { + t.Errorf("expected %s, got (hex) %s", expected, word) + } + } + i++ + } +} + func TestDecompressMatchNotOK(t *testing.T) { d := prepareLoremDict(t) defer d.Close() diff --git a/state/aggregator.go b/state/aggregator.go index 1d1a1c9a9..ee980408b 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -285,6 +285,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { txFrom = step * a.aggregationStep txTo = (step + 1) * a.aggregationStep workers = 1 + + stepStartedAt = time.Now() ) defer logEvery.Stop() @@ -368,10 +370,46 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return fmt.Errorf("domain collate-build failed: %w", err) } - ac := a.MakeContext() - defer ac.Close() + //ac := a.MakeContext() + //defer ac.Close() + + var clo, chi, plo, phi, blo, bhi time.Duration + clo, plo, blo = time.Hour*99, time.Hour*99, time.Hour*99 + for _, s := range []DomainStats{a.accounts.stats, a.code.stats, a.storage.stats} { + c := s.LastCollationTook + p := s.LastPruneTook + b := s.LastFileBuildingTook + + if c < clo { + clo = c + } + if c > chi { + chi = c + } + if p < plo { + plo = p + } + if p > phi { + phi = p + } + if b < blo { + blo = b + } + if b > bhi { + bhi = b + } + } + log.Info("[stat] finished aggregation, ready for mergeUpTo", + "range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5), + "step_took", time.Since(stepStartedAt), + "collate_min", clo, "collate_max", chi, + "prune_min", plo, "prune_max", phi, + "files_build_min", blo, "files_build_max", bhi) + + mergeStartedAt := time.Now() maxEndTxNum := a.EndTxNumMinimax() + var upmerges int for { somethingMerged, err := a.mergeLoopStep(ctx, maxEndTxNum, 1) if err != nil { @@ -380,7 +418,12 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { if !somethingMerged { break } + upmerges++ } + log.Info("[stat] aggregation merged", + "upto_tx", maxEndTxNum, + "merge_took", time.Since(mergeStartedAt), + "merges_count", upmerges) return nil } @@ -417,25 +460,10 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work a.cleanAfterFreeze(in) closeAll = false - var clo, chi, plo, phi, blo, bhi time.Duration - clo, plo, blo = time.Hour*99, time.Hour*99, time.Hour*99 + var blo, bhi time.Duration + blo = time.Hour * 99 for _, s := range []DomainStats{a.accounts.stats, a.code.stats, a.storage.stats} { - c := s.LastCollationTook - p := s.LastPruneTook b := s.LastFileBuildingTook - - if c < clo { - clo = c - } - if c > chi { - chi = c - } - if p < plo { - plo = p - } - if p > phi { - phi = p - } if b < blo { blo = b } @@ -444,14 +472,9 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work } } - log.Info("[stat] finished merge details", - // "step", step, - // "range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5), - "upto_tx", maxEndTxNum, "merge_took", time.Since(mergeStartedAt), - // "step_took", time.Since(stepStartedAt), - "collate_min", clo, "collate_max", chi, - "prune_min", plo, "prune_max", phi, - "files_build_min", blo, "files_build_max", bhi) + log.Info("[stat] finished merge step", + "upto_tx", maxEndTxNum, "merge_step_took", time.Since(mergeStartedAt), + "merge_min", blo, "merge_max", bhi) return true, nil } From e1f74341e2e0112cf750279311f7430805ddff60 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 17 Feb 2023 10:41:51 +0000 Subject: [PATCH 16/54] fix --- state/aggregator.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/state/aggregator.go b/state/aggregator.go index ee980408b..03e251e41 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -400,9 +400,10 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } } + stepTook := time.Since(stepStartedAt) log.Info("[stat] finished aggregation, ready for mergeUpTo", "range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5), - "step_took", time.Since(stepStartedAt), + "step_took", stepTook, "collate_min", clo, "collate_max", chi, "prune_min", plo, "prune_max", phi, "files_build_min", blo, "files_build_max", bhi) @@ -422,6 +423,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, + "aggregation_took", time.Since(stepStartedAt), + "step_took", stepTook, "merge_took", time.Since(mergeStartedAt), "merges_count", upmerges) return nil From 615602fb7b370298609af0191d0fc0bd47ebf79b Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 17 Feb 2023 12:57:05 +0000 Subject: [PATCH 17/54] findMergeRange beautify print, context reusing fix --- state/aggregator.go | 16 ++++++++++++---- state/merge.go | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 03e251e41..aa328ec42 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -440,10 +440,15 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work return false, nil } - ac := a.MakeContext() // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future - defer ac.Close() + //ac := a.MakeContext() // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future + //defer ac.Close() + + defer func() { + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() + }() - outs := a.staticFilesInRange(r, ac) + outs := a.staticFilesInRange(r, a.defaultCtx) defer func() { if closeAll { outs.Close() @@ -501,6 +506,10 @@ type Ranges struct { //tracesTo bool } +func (r Ranges) String() string { + return fmt.Sprintf("accounts=%s, storage=%s, code=%s, commitment=%s", r.accounts.String(), r.storage.String(), r.code.String(), r.commitment.String()) +} + func (r Ranges) any() bool { return r.accounts.any() || r.storage.any() || r.code.any() || r.commitment.any() } @@ -1038,7 +1047,6 @@ func (a *Aggregator) FinishWrites() { // Flush - must be called before Collate, if you did some writes func (a *Aggregator) Flush(ctx context.Context) error { - // TODO: Add support of commitment! flushers := []flusher{ a.accounts.Rotate(), a.storage.Rotate(), diff --git a/state/merge.go b/state/merge.go index f1e473721..51c89d5e7 100644 --- a/state/merge.go +++ b/state/merge.go @@ -106,6 +106,26 @@ type DomainRanges struct { index bool } +func (r DomainRanges) String() string { + var b strings.Builder + if r.values { + b.WriteString(fmt.Sprintf("Values: [%d, %d)", r.valuesStartTxNum, r.valuesEndTxNum)) + } + if r.history { + if b.Len() > 0 { + b.WriteString(", ") + } + b.WriteString(fmt.Sprintf("History: [%d, %d)", r.historyStartTxNum, r.historyEndTxNum)) + } + if r.index { + if b.Len() > 0 { + b.WriteString(", ") + } + b.WriteString(fmt.Sprintf("Index: [%d, %d)", r.indexStartTxNum, r.indexEndTxNum)) + } + return b.String() +} + func (r DomainRanges) any() bool { return r.values || r.history || r.index } From 49c73ee81edfaa10f4ed6844a1008e4b54443e07 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 17 Feb 2023 13:15:48 +0000 Subject: [PATCH 18/54] reduce max height of snapshot files --- state/aggregator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state/aggregator.go b/state/aggregator.go index aa328ec42..63feeab66 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -39,7 +39,7 @@ import ( // StepsInBiggestFile - files of this size are completely frozen/immutable. // files of smaller size are also immutable, but can be removed after merge to bigger files. -const StepsInBiggestFile = 16 +const StepsInBiggestFile = 4 // Reconstruction of the aggregator in another package, `aggregator` From 5ca0addc6c687cf859abe1e035d43862002bae85 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 20 Feb 2023 15:36:08 +0000 Subject: [PATCH 19/54] minor cleanup --- state/btree_index.go | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/state/btree_index.go b/state/btree_index.go index d0d62755b..cd7d00108 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -261,9 +261,6 @@ func (a *btAlloc) traverseTrick() { func (a *btAlloc) traverseDfs() { for l := 0; l < len(a.sons)-1; l++ { - //if len(a.sons[l]) < 2 { - // panic("invalid btree allocation markup") - //} a.cursors[l] = markupCursor{uint64(l), 1, 0, 0} a.nodes[l] = make([]node, 0) } @@ -1062,28 +1059,3 @@ func (b *BtIndex) OrdinalLookup(i uint64) *Cursor { key: k, value: v, d: i, ix: b.alloc, } } - -func (b *BtIndex) ExtractOffsets() map[uint64]uint64 { - //TODO implement me - panic("implement me") -} - -func (b *BtIndex) DisableReadAhead() { - //TODO implement me - panic("implement me") -} - -func (b *BtIndex) EnableReadAhead() *interface{} { - //TODO implement me - panic("implement me") -} - -func (b *BtIndex) EnableMadvNormal() *interface{} { - //TODO implement me - panic("implement me") -} - -func (b *BtIndex) EnableWillNeed() *interface{} { - //TODO implement me - panic("implement me") -} From c7fa88e1705a45264d0425db153dab8126a627f7 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 22 Feb 2023 12:01:56 +0000 Subject: [PATCH 20/54] cleanup --- state/aggregator.go | 61 ++++++----------------------- state/merge.go | 93 +-------------------------------------------- 2 files changed, 13 insertions(+), 141 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 63feeab66..9534dbb33 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -39,9 +39,7 @@ import ( // StepsInBiggestFile - files of this size are completely frozen/immutable. // files of smaller size are also immutable, but can be removed after merge to bigger files. -const StepsInBiggestFile = 4 - -// Reconstruction of the aggregator in another package, `aggregator` +const StepsInBiggestFile = 32 type Aggregator struct { aggregationStep uint64 @@ -219,9 +217,6 @@ func (a *Aggregator) SetTxNum(txNum uint64) { a.tracesTo.SetTxNum(txNum) } -// todo useless -func (a *Aggregator) SetBlockNum(bn uint64) { a.blockNum = bn } - func (a *Aggregator) SetWorkers(i int) { a.accounts.compressWorkers = i a.storage.compressWorkers = i @@ -326,6 +321,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } } + // indices are built concurrently for _, d := range []*InvertedIndex{a.logTopics, a.logAddrs, a.tracesFrom, a.tracesTo} { wg.Add(1) @@ -370,8 +366,10 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return fmt.Errorf("domain collate-build failed: %w", err) } - //ac := a.MakeContext() - //defer ac.Close() + defer func() { // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() + }() var clo, chi, plo, phi, blo, bhi time.Duration clo, plo, blo = time.Hour*99, time.Hour*99, time.Hour*99 @@ -440,14 +438,6 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work return false, nil } - //ac := a.MakeContext() // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future - //defer ac.Close() - - defer func() { - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() - }() - outs := a.staticFilesInRange(r, a.defaultCtx) defer func() { if closeAll { @@ -492,18 +482,6 @@ type Ranges struct { storage DomainRanges code DomainRanges commitment DomainRanges - //logTopicsEndTxNum uint64 - //logAddrsEndTxNum uint64 - //logTopicsStartTxNum uint64 - //logAddrsStartTxNum uint64 - //tracesFromStartTxNum uint64 - //tracesFromEndTxNum uint64 - //tracesToStartTxNum uint64 - //tracesToEndTxNum uint64 - //logAddrs bool - //logTopics bool - //tracesFrom bool - //tracesTo bool } func (r Ranges) String() string { @@ -537,18 +515,10 @@ type SelectedStaticFiles struct { commitment []*filesItem commitmentIdx []*filesItem commitmentHist []*filesItem - //tracesTo []*filesItem - //tracesFrom []*filesItem - //logTopics []*filesItem - //logAddrs []*filesItem - codeI int - storageI int - accountsI int - commitmentI int - //logAddrsI int - //tracesFromI int - //logTopicsI int - //tracesToI int + codeI int + storageI int + accountsI int + commitmentI int } func (sf SelectedStaticFiles) Close() { @@ -557,7 +527,6 @@ func (sf SelectedStaticFiles) Close() { sf.storage, sf.storageIdx, sf.storageHist, sf.code, sf.codeIdx, sf.codeHist, sf.commitment, sf.commitmentIdx, sf.commitmentHist, - //sf.logAddrs, sf.logTopics, sf.tracesFrom, sf.tracesTo, } { for _, item := range group { if item != nil { @@ -601,10 +570,6 @@ type MergedFiles struct { codeIdx, codeHist *filesItem commitment *filesItem commitmentIdx, commitmentHist *filesItem - //logAddrs *filesItem - //logTopics *filesItem - //tracesFrom *filesItem - //tracesTo *filesItem } func (mf MergedFiles) Close() { @@ -720,16 +685,14 @@ func (a *Aggregator) integrateMergedFiles(outs SelectedStaticFiles, in MergedFil a.code.integrateMergedFiles(outs.code, outs.codeIdx, outs.codeHist, in.code, in.codeIdx, in.codeHist) a.commitment.integrateMergedFiles(outs.commitment, outs.commitmentIdx, outs.commitmentHist, in.commitment, in.commitmentIdx, in.commitmentHist) } + func (a *Aggregator) cleanAfterFreeze(in MergedFiles) { a.accounts.cleanAfterFreeze(in.accountsHist) a.storage.cleanAfterFreeze(in.storageHist) a.code.cleanAfterFreeze(in.codeHist) a.commitment.cleanAfterFreeze(in.commitment) - //a.logAddrs.cleanAfterFreeze(in.logAddrs) - //a.logTopics.cleanAfterFreeze(in.logTopics) - //a.tracesFrom.cleanAfterFreeze(in.tracesFrom) - //a.tracesTo.cleanAfterFreeze(in.tracesTo) } + func (ac *AggregatorContext) ReadAccountData(addr []byte, roTx kv.Tx) ([]byte, error) { return ac.accounts.Get(addr, nil, roTx) } diff --git a/state/merge.go b/state/merge.go index 51c89d5e7..6d85d6d54 100644 --- a/state/merge.go +++ b/state/merge.go @@ -24,7 +24,6 @@ import ( "fmt" "path/filepath" "strings" - "time" "github.com/ledgerwatch/log/v3" @@ -164,93 +163,6 @@ func (d *Domain) findMergeRange(maxEndTxNum, maxSpan uint64) DomainRanges { return r } -// nolint -type mergedDomainFiles struct { - values *filesItem - index *filesItem - history *filesItem -} - -// nolint -func (m *mergedDomainFiles) Close() { - for _, item := range []*filesItem{ - m.values, m.index, m.history, - } { - if item != nil { - if item.decompressor != nil { - item.decompressor.Close() - } - if item.decompressor != nil { - item.index.Close() - } - } - } -} - -// nolint -type staticFilesInRange struct { - valuesFiles []*filesItem - indexFiles []*filesItem - historyFiles []*filesItem - startJ int -} - -// nolint -func (s *staticFilesInRange) Close() { - for _, group := range [][]*filesItem{ - s.valuesFiles, s.indexFiles, s.historyFiles, - } { - for _, item := range group { - if item != nil { - if item.decompressor != nil { - item.decompressor.Close() - } - if item.index != nil { - item.index.Close() - } - if item.bindex != nil { - item.bindex.Close() - } - } - } - } -} - -// nolint -func (d *Domain) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, dctx *DomainContext) (err error) { - closeAll := true - for rng := d.findMergeRange(maxSpan, maxTxNum); rng.any(); rng = d.findMergeRange(maxTxNum, maxSpan) { - var sfr staticFilesInRange - sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, sfr.startJ = d.staticFilesInRange(rng, dctx) - defer func() { - if closeAll { - sfr.Close() - } - }() - - var mf mergedDomainFiles - if mf.values, mf.index, mf.history, err = d.mergeFiles(ctx, sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, rng, workers); err != nil { - return err - } - defer func() { - if closeAll { - mf.Close() - } - }() - - defer func(t time.Time) { log.Info("[snapshots] merge", "took", time.Since(t)) }(time.Now()) - d.integrateMergedFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles, mf.values, mf.index, mf.history) - - // if err := d.deleteFiles(sfr.valuesFiles, sfr.indexFiles, sfr.historyFiles); err != nil { - // return err - // } - - log.Info(fmt.Sprintf("domain files mergedRange[%d, %d) name=%s span=%d \n", rng.valuesStartTxNum, rng.valuesEndTxNum, d.filenameBase, maxSpan)) - } - closeAll = false - return nil -} - // 0-1,1-2,2-3,3-4: allow merge 0-1 // 0-2,2-3,3-4: allow merge 0-4 // 0-2,2-4: allow merge 0-4 @@ -288,7 +200,6 @@ func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint return minFound, startTxNum, endTxNum } -// nolint func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, ictx *InvertedIndexContext) (err error) { closeAll := true for updated, startTx, endTx := ii.findMergeRange(maxSpan, maxTxNum); updated; updated, startTx, endTx = ii.findMergeRange(maxTxNum, maxSpan) { @@ -314,9 +225,7 @@ func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan }() ii.integrateMergedFiles(staticFiles, mergedIndex) - // if err := ii.deleteFiles(staticFiles); err != nil { - // return err - // } + ii.cleanAfterFreeze(mergedIndex) } closeAll = false return nil From cba2b3a195216847d5f8aeeca82f156947e857e0 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 22 Feb 2023 12:17:27 +0000 Subject: [PATCH 21/54] removed old aggregator code, made lint --- aggregator/aggregator.go | 3302 --------------------------------- aggregator/aggregator_test.go | 314 ---- aggregator/history.go | 354 ---- state/aggregator_test.go | 30 +- state/btree_index.go | 45 +- state/domain.go | 24 +- state/domain_committed.go | 2 + 7 files changed, 42 insertions(+), 4029 deletions(-) delete mode 100644 aggregator/aggregator.go delete mode 100644 aggregator/aggregator_test.go delete mode 100644 aggregator/history.go diff --git a/aggregator/aggregator.go b/aggregator/aggregator.go deleted file mode 100644 index 8849a451a..000000000 --- a/aggregator/aggregator.go +++ /dev/null @@ -1,3302 +0,0 @@ -/* - Copyright 2022 Erigon contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package aggregator - -import ( - "bufio" - "bytes" - "container/heap" - "context" - "encoding/binary" - "errors" - "fmt" - "hash" - "io" - "io/fs" - "math" - "os" - "path" - "path/filepath" - "regexp" - "strconv" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/RoaringBitmap/roaring/roaring64" - "github.com/google/btree" - "github.com/ledgerwatch/log/v3" - "github.com/spaolacci/murmur3" - "golang.org/x/crypto/sha3" - "golang.org/x/exp/slices" - - "github.com/ledgerwatch/erigon-lib/common" - "github.com/ledgerwatch/erigon-lib/etl" - - "github.com/ledgerwatch/erigon-lib/commitment" - "github.com/ledgerwatch/erigon-lib/common/length" - "github.com/ledgerwatch/erigon-lib/compress" - "github.com/ledgerwatch/erigon-lib/kv" - "github.com/ledgerwatch/erigon-lib/recsplit" - "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" -) - -// Aggregator of multiple state files to support state reader and state writer -// The convension for the file names are as follows -// State is composed of three types of files: -// 1. Accounts. keys are addresses (20 bytes), values are encoding of accounts -// 2. Contract storage. Keys are concatenation of addresses (20 bytes) and storage locations (32 bytes), values have their leading zeroes removed -// 3. Contract codes. Keys are addresses (20 bytes), values are bycodes -// Within each type, any file can cover an interval of block numbers, for example, `accounts.1-16` represents changes in accounts -// that were effected by the blocks from 1 to 16, inclusively. The second component of the interval will be called "end block" for the file. -// Finally, for each type and interval, there are two files - one with the compressed data (extension `dat`), -// and another with the index (extension `idx`) consisting of the minimal perfect hash table mapping keys to the offsets of corresponding keys -// in the data file -// Aggregator consists (apart from the file it is aggregating) of the 4 parts: -// 1. Persistent table of expiration time for each of the files. Key - name of the file, value - timestamp, at which the file can be removed -// 2. Transient (in-memory) mapping the "end block" of each file to the objects required for accessing the file (compress.Decompressor and resplit.Index) -// 3. Persistent tables (one for accounts, one for contract storage, and one for contract code) summarising all the 1-block state diff files -// that were not yet merged together to form larger files. In these tables, keys are the same as keys in the state diff files, but values are also -// augemented by the number of state diff files this key is present. This number gets decremented every time when a 1-block state diff files is removed -// from the summary table (due to being merged). And when this number gets to 0, the record is deleted from the summary table. -// This number is encoded into first 4 bytes of the value -// 4. Aggregating persistent hash table. Maps state keys to the block numbers for the use in the part 2 (which is not necessarily the block number where -// the item last changed, but it is guaranteed to find correct element in the Transient mapping of part 2 - -type FileType int - -const ( - Account FileType = iota - Storage - Code - Commitment - AccountHistory - StorageHistory - CodeHistory - AccountBitmap - StorageBitmap - CodeBitmap - NumberOfTypes -) - -const ( - FirstType = Account - NumberOfAccountStorageTypes = Code - NumberOfStateTypes = AccountHistory -) - -func (ft FileType) String() string { - switch ft { - case Account: - return "account" - case Storage: - return "storage" - case Code: - return "code" - case Commitment: - return "commitment" - case AccountHistory: - return "ahistory" - case CodeHistory: - return "chistory" - case StorageHistory: - return "shistory" - case AccountBitmap: - return "abitmap" - case CodeBitmap: - return "cbitmap" - case StorageBitmap: - return "sbitmap" - default: - panic(fmt.Sprintf("unknown file type: %d", ft)) - } -} - -func (ft FileType) Table() string { - switch ft { - case Account: - return kv.StateAccounts - case Storage: - return kv.StateStorage - case Code: - return kv.StateCode - case Commitment: - return kv.StateCommitment - default: - panic(fmt.Sprintf("unknown file type: %d", ft)) - } -} - -func ParseFileType(s string) (FileType, bool) { - switch s { - case "account": - return Account, true - case "storage": - return Storage, true - case "code": - return Code, true - case "commitment": - return Commitment, true - case "ahistory": - return AccountHistory, true - case "chistory": - return CodeHistory, true - case "shistory": - return StorageHistory, true - case "abitmap": - return AccountBitmap, true - case "cbitmap": - return CodeBitmap, true - case "sbitmap": - return StorageBitmap, true - default: - return NumberOfTypes, false - } -} - -type Aggregator struct { - files [NumberOfTypes]*btree.BTree - hph commitment.Trie //*commitment.HexPatriciaHashed - archHasher murmur3.Hash128 - keccak hash.Hash - historyChannel chan struct{} - mergeChannel chan struct{} - tracedKeys map[string]struct{} // Set of keys being traced during aggregations - changesBtree *btree.BTree // btree of ChangesItem - historyError chan error - mergeError chan error - aggChannel chan *AggregationTask - aggError chan error - diffDir string // Directory where the state diff files are stored - arches [NumberOfStateTypes][]uint32 // Over-arching hash tables containing the block number of last aggregation - historyWg sync.WaitGroup - aggWg sync.WaitGroup - mergeWg sync.WaitGroup - unwindLimit uint64 // How far the chain may unwind - aggregationStep uint64 // How many items (block, but later perhaps txs or changes) are required to form one state diff file - fileHits uint64 // Counter for state file hit ratio - fileMisses uint64 // Counter for state file hit ratio - fileLocks [NumberOfTypes]sync.RWMutex - commitments bool // Whether to calculate commitments - changesets bool // Whether to generate changesets (off by default) - trace bool // Turns on tracing for specific accounts and locations -} - -type ChangeFile struct { - r *bufio.Reader - rTx *bufio.Reader - w *bufio.Writer - fileTx *os.File - wTx *bufio.Writer - file *os.File - pathTx string - path string - dir string - namebase string - words []byte // Words pending for the next block record, in the same slice - wordOffsets []int // Offsets of words in the `words` slice - step uint64 - txNum uint64 // Currently read transaction number - txRemaining uint64 // Remaining number of bytes to read in the current transaction -} - -func (cf *ChangeFile) closeFile() error { - if len(cf.wordOffsets) > 0 { - return fmt.Errorf("closeFile without finish") - } - if cf.w != nil { - if err := cf.w.Flush(); err != nil { - return err - } - cf.w = nil - } - if cf.file != nil { - if err := cf.file.Close(); err != nil { - return err - } - cf.file = nil - } - if cf.wTx != nil { - if err := cf.wTx.Flush(); err != nil { - return err - } - cf.wTx = nil - } - if cf.fileTx != nil { - if err := cf.fileTx.Close(); err != nil { - return err - } - cf.fileTx = nil - } - return nil -} - -func (cf *ChangeFile) openFile(blockNum uint64, write bool) error { - if len(cf.wordOffsets) > 0 { - return fmt.Errorf("openFile without finish") - } - rem := blockNum % cf.step - startBlock := blockNum - rem - endBlock := startBlock + cf.step - 1 - if cf.w == nil { - cf.path = filepath.Join(cf.dir, fmt.Sprintf("%s.%d-%d.chg", cf.namebase, startBlock, endBlock)) - cf.pathTx = filepath.Join(cf.dir, fmt.Sprintf("%s.%d-%d.ctx", cf.namebase, startBlock, endBlock)) - var err error - if write { - if cf.file, err = os.OpenFile(cf.path, os.O_RDWR|os.O_CREATE, 0755); err != nil { - return err - } - if cf.fileTx, err = os.OpenFile(cf.pathTx, os.O_RDWR|os.O_CREATE, 0755); err != nil { - return err - } - if _, err = cf.file.Seek(0, 2 /* relative to the end of the file */); err != nil { - return err - } - if _, err = cf.fileTx.Seek(0, 2 /* relative to the end of the file */); err != nil { - return err - } - } else { - if cf.file, err = os.Open(cf.path); err != nil { - return err - } - if cf.fileTx, err = os.Open(cf.pathTx); err != nil { - return err - } - } - if write { - cf.w = bufio.NewWriter(cf.file) - cf.wTx = bufio.NewWriter(cf.fileTx) - } - cf.r = bufio.NewReader(cf.file) - cf.rTx = bufio.NewReader(cf.fileTx) - } - return nil -} - -func (cf *ChangeFile) rewind() error { - var err error - if _, err = cf.file.Seek(0, 0); err != nil { - return err - } - cf.r = bufio.NewReader(cf.file) - if _, err = cf.fileTx.Seek(0, 0); err != nil { - return err - } - cf.rTx = bufio.NewReader(cf.fileTx) - return nil -} - -func (cf *ChangeFile) add(word []byte) { - cf.words = append(cf.words, word...) - cf.wordOffsets = append(cf.wordOffsets, len(cf.words)) -} - -func (cf *ChangeFile) finish(txNum uint64) error { - var numBuf [10]byte - // Write out words - lastOffset := 0 - var size uint64 - for _, offset := range cf.wordOffsets { - word := cf.words[lastOffset:offset] - n := binary.PutUvarint(numBuf[:], uint64(len(word))) - if _, err := cf.w.Write(numBuf[:n]); err != nil { - return err - } - if len(word) > 0 { - if _, err := cf.w.Write(word); err != nil { - return err - } - } - size += uint64(n + len(word)) - lastOffset = offset - } - cf.words = cf.words[:0] - cf.wordOffsets = cf.wordOffsets[:0] - n := binary.PutUvarint(numBuf[:], txNum) - if _, err := cf.wTx.Write(numBuf[:n]); err != nil { - return err - } - n = binary.PutUvarint(numBuf[:], size) - if _, err := cf.wTx.Write(numBuf[:n]); err != nil { - return err - } - return nil -} - -// prevTx positions the reader to the beginning -// of the transaction -func (cf *ChangeFile) nextTx() (bool, error) { - var err error - if cf.txNum, err = binary.ReadUvarint(cf.rTx); err != nil { - if errors.Is(err, io.EOF) { - return false, nil - } - return false, err - } - if cf.txRemaining, err = binary.ReadUvarint(cf.rTx); err != nil { - return false, err - } - return true, nil -} - -func (cf *ChangeFile) nextWord(wordBuf []byte) ([]byte, bool, error) { - if cf.txRemaining == 0 { - return wordBuf, false, nil - } - ws, err := binary.ReadUvarint(cf.r) - if err != nil { - return wordBuf, false, fmt.Errorf("word size: %w", err) - } - var buf []byte - if total := len(wordBuf) + int(ws); cap(wordBuf) >= total { - buf = wordBuf[:total] // Reuse the space in wordBuf, is it has enough capacity - } else { - buf = make([]byte, total) - copy(buf, wordBuf) - } - if _, err = io.ReadFull(cf.r, buf[len(wordBuf):]); err != nil { - return wordBuf, false, fmt.Errorf("read word (%d %d): %w", ws, len(buf[len(wordBuf):]), err) - } - var numBuf [10]byte - n := binary.PutUvarint(numBuf[:], ws) - cf.txRemaining -= uint64(n) + ws - return buf, true, nil -} - -func (cf *ChangeFile) deleteFile() error { - if err := os.Remove(cf.path); err != nil { - return err - } - if err := os.Remove(cf.pathTx); err != nil { - return err - } - return nil -} - -type Changes struct { - namebase string - dir string - keys ChangeFile - before ChangeFile - after ChangeFile - step uint64 - beforeOn bool -} - -func (c *Changes) Init(namebase string, step uint64, dir string, beforeOn bool) { - c.namebase = namebase - c.step = step - c.dir = dir - c.keys.namebase = namebase + ".keys" - c.keys.dir = dir - c.keys.step = step - c.before.namebase = namebase + ".before" - c.before.dir = dir - c.before.step = step - c.after.namebase = namebase + ".after" - c.after.dir = dir - c.after.step = step - c.beforeOn = beforeOn -} - -func (c *Changes) closeFiles() error { - if err := c.keys.closeFile(); err != nil { - return err - } - if c.beforeOn { - if err := c.before.closeFile(); err != nil { - return err - } - } - if err := c.after.closeFile(); err != nil { - return err - } - return nil -} - -func (c *Changes) openFiles(blockNum uint64, write bool) error { - if err := c.keys.openFile(blockNum, write); err != nil { - return err - } - if c.beforeOn { - if err := c.before.openFile(blockNum, write); err != nil { - return err - } - } - if err := c.after.openFile(blockNum, write); err != nil { - return err - } - return nil -} - -func (c *Changes) insert(key, after []byte) { - c.keys.add(key) - if c.beforeOn { - c.before.add(nil) - } - c.after.add(after) -} - -func (c *Changes) update(key, before, after []byte) { - c.keys.add(key) - if c.beforeOn { - c.before.add(before) - } - c.after.add(after) -} - -func (c *Changes) delete(key, before []byte) { - c.keys.add(key) - if c.beforeOn { - c.before.add(before) - } - c.after.add(nil) -} - -func (c *Changes) finish(txNum uint64) error { - if err := c.keys.finish(txNum); err != nil { - return err - } - if c.beforeOn { - if err := c.before.finish(txNum); err != nil { - return err - } - } - if err := c.after.finish(txNum); err != nil { - return err - } - return nil -} - -func (c *Changes) nextTx() (bool, uint64, error) { - bkeys, err := c.keys.nextTx() - if err != nil { - return false, 0, err - } - var bbefore, bafter bool - if c.beforeOn { - if bbefore, err = c.before.nextTx(); err != nil { - return false, 0, err - } - } - if bafter, err = c.after.nextTx(); err != nil { - return false, 0, err - } - if c.beforeOn && bkeys != bbefore { - return false, 0, fmt.Errorf("inconsistent tx iteration") - } - if bkeys != bafter { - return false, 0, fmt.Errorf("inconsistent tx iteration") - } - txNum := c.keys.txNum - if c.beforeOn { - if txNum != c.before.txNum { - return false, 0, fmt.Errorf("inconsistent txNum, keys: %d, before: %d", txNum, c.before.txNum) - } - } - if txNum != c.after.txNum { - return false, 0, fmt.Errorf("inconsistent txNum, keys: %d, after: %d", txNum, c.after.txNum) - } - return bkeys, txNum, nil -} - -func (c *Changes) rewind() error { - if err := c.keys.rewind(); err != nil { - return err - } - if c.beforeOn { - if err := c.before.rewind(); err != nil { - return err - } - } - if err := c.after.rewind(); err != nil { - return err - } - return nil -} - -func (c *Changes) nextTriple(keyBuf, beforeBuf, afterBuf []byte) ([]byte, []byte, []byte, bool, error) { - key, bkeys, err := c.keys.nextWord(keyBuf) - if err != nil { - return keyBuf, beforeBuf, afterBuf, false, fmt.Errorf("next key: %w", err) - } - var before, after []byte - var bbefore, bafter bool - if c.beforeOn { - if before, bbefore, err = c.before.nextWord(beforeBuf); err != nil { - return keyBuf, beforeBuf, afterBuf, false, fmt.Errorf("next before: %w", err) - } - } - if c.beforeOn && bkeys != bbefore { - return keyBuf, beforeBuf, afterBuf, false, fmt.Errorf("inconsistent word iteration") - } - if after, bafter, err = c.after.nextWord(afterBuf); err != nil { - return keyBuf, beforeBuf, afterBuf, false, fmt.Errorf("next after: %w", err) - } - if bkeys != bafter { - return keyBuf, beforeBuf, afterBuf, false, fmt.Errorf("inconsistent word iteration") - } - return key, before, after, bkeys, nil -} - -func (c *Changes) deleteFiles() error { - if err := c.keys.deleteFile(); err != nil { - return err - } - if c.beforeOn { - if err := c.before.deleteFile(); err != nil { - return err - } - } - if err := c.after.deleteFile(); err != nil { - return err - } - return nil -} - -func buildIndex(d *compress.Decompressor, idxPath, tmpDir string, count int) (*recsplit.Index, error) { - var rs *recsplit.RecSplit - var err error - if rs, err = recsplit.NewRecSplit(recsplit.RecSplitArgs{ - KeyCount: count, - Enums: false, - BucketSize: 2000, - LeafSize: 8, - TmpDir: tmpDir, - IndexFile: idxPath, - EtlBufLimit: etl.BufferOptimalSize / 2, - }); err != nil { - return nil, err - } - defer rs.Close() - rs.LogLvl(log.LvlDebug) - - word := make([]byte, 0, 256) - var pos uint64 - g := d.MakeGetter() - for { - g.Reset(0) - for g.HasNext() { - word, _ = g.Next(word[:0]) - if err = rs.AddKey(word, pos); err != nil { - return nil, err - } - // Skip value - pos = g.Skip() - } - if err = rs.Build(); err != nil { - if rs.Collision() { - log.Info("Building recsplit. Collision happened. It's ok. Restarting...") - rs.ResetNextSalt() - } else { - return nil, err - } - } else { - break - } - } - var idx *recsplit.Index - if idx, err = recsplit.OpenIndex(idxPath); err != nil { - return nil, err - } - return idx, nil -} - -// aggregate gathers changes from the changefiles into a B-tree, and "removes" them from the database -// This function is time-critical because it needs to be run in the same go-routine (thread) as the general -// execution (due to read-write tx). After that, we can optimistically execute the rest in the background -func (c *Changes) aggregate(blockFrom, blockTo uint64, prefixLen int, tx kv.RwTx, table string, commitMerger commitmentMerger) (*btree.BTreeG[*AggregateItem], error) { - if err := c.openFiles(blockTo, false /* write */); err != nil { - return nil, fmt.Errorf("open files: %w", err) - } - bt := btree.NewG[*AggregateItem](32, AggregateItemLess) - err := c.aggregateToBtree(bt, prefixLen, commitMerger) - if err != nil { - return nil, fmt.Errorf("aggregateToBtree: %w", err) - } - // Clean up the DB table - var e error - bt.Ascend(func(item *AggregateItem) bool { - if item.count == 0 { - return true - } - dbPrefix := item.k - prevV, err := tx.GetOne(table, dbPrefix) - if err != nil { - e = err - return false - } - if prevV == nil { - e = fmt.Errorf("record not found in db for %s key %x", table, dbPrefix) - return false - } - - prevNum := binary.BigEndian.Uint32(prevV[:4]) - if prevNum < item.count { - e = fmt.Errorf("record count too low for %s key %s count %d, subtracting %d", table, dbPrefix, prevNum, item.count) - return false - } - if prevNum == item.count { - if e = tx.Delete(table, dbPrefix); e != nil { - return false - } - } else { - v := make([]byte, len(prevV)) - binary.BigEndian.PutUint32(v[:4], prevNum-item.count) - copy(v[4:], prevV[4:]) - - if e = tx.Put(table, dbPrefix, v); e != nil { - return false - } - } - return true - }) - if e != nil { - return nil, fmt.Errorf("clean up table %s after aggregation: %w", table, e) - } - return bt, nil -} - -func (a *Aggregator) updateArch(bt *btree.BTreeG[*AggregateItem], fType FileType, blockNum32 uint32) { - arch := a.arches[fType] - h := a.archHasher - n := uint64(len(arch)) - if n == 0 { - return - } - bt.Ascend(func(item *AggregateItem) bool { - if item.count == 0 { - return true - } - h.Reset() - h.Write(item.k) //nolint:errcheck - p, _ := h.Sum128() - p = p % n - v := atomic.LoadUint32(&arch[p]) - if v < blockNum32 { - //fmt.Printf("Updated %s arch [%x]=%d %d\n", fType.String(), item.k, p, blockNum32) - atomic.StoreUint32(&arch[p], blockNum32) - } - return true - }) -} - -type AggregateItem struct { - k, v []byte - count uint32 -} - -func AggregateItemLess(a, than *AggregateItem) bool { return bytes.Compare(a.k, than.k) < 0 } -func (i *AggregateItem) Less(than btree.Item) bool { - return bytes.Compare(i.k, than.(*AggregateItem).k) < 0 -} - -func (c *Changes) produceChangeSets(blockFrom, blockTo uint64, historyType, bitmapType FileType) (*compress.Decompressor, *recsplit.Index, *compress.Decompressor, *recsplit.Index, error) { - chsetDatPath := filepath.Join(c.dir, fmt.Sprintf("%s.%d-%d.dat", historyType.String(), blockFrom, blockTo)) - chsetIdxPath := filepath.Join(c.dir, fmt.Sprintf("%s.%d-%d.idx", historyType.String(), blockFrom, blockTo)) - bitmapDatPath := filepath.Join(c.dir, fmt.Sprintf("%s.%d-%d.dat", bitmapType.String(), blockFrom, blockTo)) - bitmapIdxPath := filepath.Join(c.dir, fmt.Sprintf("%s.%d-%d.idx", bitmapType.String(), blockFrom, blockTo)) - var blockSuffix [8]byte - binary.BigEndian.PutUint64(blockSuffix[:], blockTo) - bitmaps := map[string]*roaring64.Bitmap{} - comp, err := compress.NewCompressor(context.Background(), AggregatorPrefix, chsetDatPath, c.dir, compress.MinPatternScore, 1, log.LvlDebug) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets NewCompressor: %w", err) - } - defer func() { - if comp != nil { - comp.Close() - } - }() - var totalRecords int - var b bool - var e error - var txNum uint64 - var key, before, after []byte - if err = c.rewind(); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets rewind: %w", err) - } - var txKey = make([]byte, 8, 60) - for b, txNum, e = c.nextTx(); b && e == nil; b, txNum, e = c.nextTx() { - binary.BigEndian.PutUint64(txKey[:8], txNum) - for key, before, after, b, e = c.nextTriple(key[:0], before[:0], after[:0]); b && e == nil; key, before, after, b, e = c.nextTriple(key[:0], before[:0], after[:0]) { - totalRecords++ - txKey = append(txKey[:8], key...) - // In the inital files and most merged file, the txKey is added to the file, but it gets removed in the final merge - if err = comp.AddUncompressedWord(txKey); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets AddWord key: %w", err) - } - if err = comp.AddUncompressedWord(before); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets AddWord before: %w", err) - } - //if historyType == AccountHistory { - // fmt.Printf("produce %s.%d-%d [%x]=>[%x]\n", historyType.String(), blockFrom, blockTo, txKey, before) - //} - var bitmap *roaring64.Bitmap - var ok bool - if bitmap, ok = bitmaps[string(key)]; !ok { - bitmap = roaring64.New() - bitmaps[string(key)] = bitmap - } - bitmap.Add(txNum) - } - if e != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets nextTriple: %w", e) - } - } - if e != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets prevTx: %w", e) - } - if err = comp.Compress(); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets Compress: %w", err) - } - comp.Close() - comp = nil - var d *compress.Decompressor - var index *recsplit.Index - if d, err = compress.NewDecompressor(chsetDatPath); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets changeset decompressor: %w", err) - } - if index, err = buildIndex(d, chsetIdxPath, c.dir, totalRecords); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets changeset buildIndex: %w", err) - } - // Create bitmap files - bitmapC, err := compress.NewCompressor(context.Background(), AggregatorPrefix, bitmapDatPath, c.dir, compress.MinPatternScore, 1, log.LvlDebug) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap NewCompressor: %w", err) - } - defer func() { - if bitmapC != nil { - bitmapC.Close() - } - }() - idxKeys := make([]string, len(bitmaps)) - i := 0 - var buf []byte - for key := range bitmaps { - idxKeys[i] = key - i++ - } - slices.Sort(idxKeys) - for _, key := range idxKeys { - if err = bitmapC.AddUncompressedWord([]byte(key)); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap add key: %w", err) - } - bitmap := bitmaps[key] - ef := eliasfano32.NewEliasFano(bitmap.GetCardinality(), bitmap.Maximum()) - it := bitmap.Iterator() - for it.HasNext() { - v := it.Next() - ef.AddOffset(v) - } - ef.Build() - buf = ef.AppendBytes(buf[:0]) - if err = bitmapC.AddUncompressedWord(buf); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap add val: %w", err) - } - } - if err = bitmapC.Compress(); err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap Compress: %w", err) - } - bitmapC.Close() - bitmapC = nil - bitmapD, err := compress.NewDecompressor(bitmapDatPath) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap decompressor: %w", err) - } - - bitmapI, err := buildIndex(bitmapD, bitmapIdxPath, c.dir, len(idxKeys)) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("produceChangeSets bitmap buildIndex: %w", err) - } - return d, index, bitmapD, bitmapI, nil -} - -// aggregateToBtree iterates over all available changes in the change files covered by this instance `c` -// (there are 3 of them, one for "keys", one for values "before" every change, and one for values "after" every change) -// and create a B-tree where each key is only represented once, with the value corresponding to the "after" value -// of the latest change. -func (c *Changes) aggregateToBtree(bt *btree.BTreeG[*AggregateItem], prefixLen int, commitMerge commitmentMerger) error { - var b bool - var e error - var key, before, after []byte - var ai AggregateItem - var prefix []byte - // Note that the following loop iterates over transactions forwards, therefore it replace entries in the B-tree - for b, _, e = c.nextTx(); b && e == nil; b, _, e = c.nextTx() { - // Within each transaction, keys are unique, but they can appear in any order - for key, before, after, b, e = c.nextTriple(key[:0], before[:0], after[:0]); b && e == nil; key, before, after, b, e = c.nextTriple(key[:0], before[:0], after[:0]) { - if prefixLen > 0 && !bytes.Equal(prefix, key[:prefixLen]) { - prefix = common.Copy(key[:prefixLen]) - item := &AggregateItem{k: prefix, count: 0} - bt.ReplaceOrInsert(item) - } - - ai.k = key - i, ok := bt.Get(&ai) - if !ok || i == nil { - item := &AggregateItem{k: common.Copy(key), v: common.Copy(after), count: 1} - bt.ReplaceOrInsert(item) - continue - } - - item := i - if commitMerge != nil { - mergedVal, err := commitMerge(item.v, after, nil) - if err != nil { - return fmt.Errorf("merge branches (%T) : %w", commitMerge, err) - } - //fmt.Printf("aggregateToBtree prefix [%x], [%x]+[%x]=>[%x]\n", commitment.CompactToHex(key), after, item.v, mergedVal) - item.v = mergedVal - } else { - item.v = common.Copy(after) - } - item.count++ - } - if e != nil { - return fmt.Errorf("aggregateToBtree nextTriple: %w", e) - } - } - if e != nil { - return fmt.Errorf("aggregateToBtree prevTx: %w", e) - } - return nil -} - -const AggregatorPrefix = "aggregator" - -func btreeToFile(bt *btree.BTreeG[*AggregateItem], datPath, tmpdir string, trace bool, workers int) (int, error) { - comp, err := compress.NewCompressor(context.Background(), AggregatorPrefix, datPath, tmpdir, compress.MinPatternScore, workers, log.LvlDebug) - if err != nil { - return 0, err - } - defer comp.Close() - comp.SetTrace(trace) - count := 0 - bt.Ascend(func(item *AggregateItem) bool { - //fmt.Printf("btreeToFile %s [%x]=>[%x]\n", datPath, item.k, item.v) - if err = comp.AddUncompressedWord(item.k); err != nil { - return false - } - count++ // Only counting keys, not values - if err = comp.AddUncompressedWord(item.v); err != nil { - return false - } - return true - }) - if err != nil { - return 0, err - } - if err = comp.Compress(); err != nil { - return 0, err - } - return count, nil -} - -type ChangesItem struct { - endBlock uint64 - startBlock uint64 - fileCount int -} - -func (i *ChangesItem) Less(than btree.Item) bool { - if i.endBlock == than.(*ChangesItem).endBlock { - // Larger intevals will come last - return i.startBlock > than.(*ChangesItem).startBlock - } - return i.endBlock < than.(*ChangesItem).endBlock -} - -type byEndBlockItem struct { - decompressor *compress.Decompressor - getter *compress.Getter // reader for the decompressor - getterMerge *compress.Getter // reader for the decompressor used in the background merge thread - index *recsplit.Index - indexReader *recsplit.IndexReader // reader for the index - readerMerge *recsplit.IndexReader // index reader for the background merge thread - tree *btree.BTreeG[*AggregateItem] // Substitute for decompressor+index combination - startBlock uint64 - endBlock uint64 -} - -func ByEndBlockItemLess(i, than *byEndBlockItem) bool { - if i.endBlock == than.endBlock { - return i.startBlock > than.startBlock - } - return i.endBlock < than.endBlock -} - -func (i *byEndBlockItem) Less(than btree.Item) bool { - if i.endBlock == than.(*byEndBlockItem).endBlock { - return i.startBlock > than.(*byEndBlockItem).startBlock - } - return i.endBlock < than.(*byEndBlockItem).endBlock -} - -func (a *Aggregator) scanStateFiles(files []fs.DirEntry) { - typeStrings := make([]string, NumberOfTypes) - for fType := FileType(0); fType < NumberOfTypes; fType++ { - typeStrings[fType] = fType.String() - } - re := regexp.MustCompile("^(" + strings.Join(typeStrings, "|") + ").([0-9]+)-([0-9]+).(dat|idx)$") - var err error - for _, f := range files { - name := f.Name() - subs := re.FindStringSubmatch(name) - if len(subs) != 5 { - if len(subs) != 0 { - log.Warn("File ignored by aggregator, more than 4 submatches", "name", name, "submatches", len(subs)) - } - continue - } - var startBlock, endBlock uint64 - if startBlock, err = strconv.ParseUint(subs[2], 10, 64); err != nil { - log.Warn("File ignored by aggregator, parsing startBlock", "error", err, "name", name) - continue - } - if endBlock, err = strconv.ParseUint(subs[3], 10, 64); err != nil { - log.Warn("File ignored by aggregator, parsing endBlock", "error", err, "name", name) - continue - } - if startBlock > endBlock { - log.Warn("File ignored by aggregator, startBlock > endBlock", "name", name) - continue - } - fType, ok := ParseFileType(subs[1]) - if !ok { - log.Warn("File ignored by aggregator, type unknown", "type", subs[1]) - } - var item = &byEndBlockItem{startBlock: startBlock, endBlock: endBlock} - var foundI *byEndBlockItem - a.files[fType].AscendGreaterOrEqual(&byEndBlockItem{startBlock: endBlock, endBlock: endBlock}, func(i btree.Item) bool { - it := i.(*byEndBlockItem) - if it.endBlock == endBlock { - foundI = it - } - return false - }) - if foundI == nil || foundI.startBlock > startBlock { - log.Info("Load state file", "name", name, "type", fType.String(), "startBlock", startBlock, "endBlock", endBlock) - a.files[fType].ReplaceOrInsert(item) - } - } -} - -func NewAggregator(diffDir string, unwindLimit uint64, aggregationStep uint64, changesets, commitments bool, minArch uint64, trie commitment.Trie, tx kv.RwTx) (*Aggregator, error) { - a := &Aggregator{ - diffDir: diffDir, - unwindLimit: unwindLimit, - aggregationStep: aggregationStep, - tracedKeys: map[string]struct{}{}, - keccak: sha3.NewLegacyKeccak256(), - hph: trie, - aggChannel: make(chan *AggregationTask, 1024), - aggError: make(chan error, 1), - mergeChannel: make(chan struct{}, 1), - mergeError: make(chan error, 1), - historyChannel: make(chan struct{}, 1), - historyError: make(chan error, 1), - changesets: changesets, - commitments: commitments, - archHasher: murmur3.New128WithSeed(0), // TODO: Randomise salt - } - for fType := FirstType; fType < NumberOfTypes; fType++ { - a.files[fType] = btree.New(32) - } - var closeStateFiles = true // It will be set to false in case of success at the end of the function - defer func() { - // Clean up all decompressor and indices upon error - if closeStateFiles { - a.Close() - } - }() - // Scan the diff directory and create the mapping of end blocks to files - files, err := os.ReadDir(diffDir) - if err != nil { - return nil, err - } - a.scanStateFiles(files) - // Check for overlaps and holes - for fType := FirstType; fType < NumberOfTypes; fType++ { - if err := checkOverlaps(fType.String(), a.files[fType]); err != nil { - return nil, err - } - } - // Open decompressor and index files for all items in state trees - for fType := FirstType; fType < NumberOfTypes; fType++ { - if err := a.openFiles(fType, minArch); err != nil { - return nil, fmt.Errorf("opening %s state files: %w", fType.String(), err) - } - } - a.changesBtree = btree.New(32) - re := regexp.MustCompile(`^(account|storage|code|commitment).(keys|before|after).([0-9]+)-([0-9]+).chg$`) - for _, f := range files { - name := f.Name() - subs := re.FindStringSubmatch(name) - if len(subs) != 5 { - if len(subs) != 0 { - log.Warn("File ignored by changes scan, more than 4 submatches", "name", name, "submatches", len(subs)) - } - continue - } - var startBlock, endBlock uint64 - if startBlock, err = strconv.ParseUint(subs[3], 10, 64); err != nil { - log.Warn("File ignored by changes scan, parsing startBlock", "error", err, "name", name) - continue - } - if endBlock, err = strconv.ParseUint(subs[4], 10, 64); err != nil { - log.Warn("File ignored by changes scan, parsing endBlock", "error", err, "name", name) - continue - } - if startBlock > endBlock { - log.Warn("File ignored by changes scan, startBlock > endBlock", "name", name) - continue - } - if endBlock != startBlock+aggregationStep-1 { - log.Warn("File ignored by changes scan, endBlock != startBlock+aggregationStep-1", "name", name) - continue - } - var item = &ChangesItem{fileCount: 1, startBlock: startBlock, endBlock: endBlock} - i := a.changesBtree.Get(item) - if i == nil { - a.changesBtree.ReplaceOrInsert(item) - } else { - item = i.(*ChangesItem) - if item.startBlock == startBlock { - item.fileCount++ - } else { - return nil, fmt.Errorf("change files overlap [%d-%d] with [%d-%d]", item.startBlock, item.endBlock, startBlock, endBlock) - } - } - } - // Check for holes in change files - minStart := uint64(math.MaxUint64) - a.changesBtree.Descend(func(i btree.Item) bool { - item := i.(*ChangesItem) - if item.startBlock < minStart { - if item.endBlock >= minStart { - err = fmt.Errorf("overlap of change files [%d-%d] with %d", item.startBlock, item.endBlock, minStart) - return false - } - if minStart != math.MaxUint64 && item.endBlock+1 != minStart { - err = fmt.Errorf("whole in change files [%d-%d]", item.endBlock, minStart) - return false - } - minStart = item.startBlock - } else { - err = fmt.Errorf("overlap of change files [%d-%d] with %d", item.startBlock, item.endBlock, minStart) - return false - } - return true - }) - if err != nil { - return nil, err - } - for fType := FirstType; fType < NumberOfStateTypes; fType++ { - if err = checkOverlapWithMinStart(fType.String(), a.files[fType], minStart); err != nil { - return nil, err - } - } - if err = a.rebuildRecentState(tx); err != nil { - return nil, fmt.Errorf("rebuilding recent state from change files: %w", err) - } - closeStateFiles = false - a.aggWg.Add(1) - go a.backgroundAggregation() - a.mergeWg.Add(1) - go a.backgroundMerge() - if a.changesets { - a.historyWg.Add(1) - go a.backgroundHistoryMerge() - } - return a, nil -} - -// rebuildRecentState reads change files and reconstructs the recent state -func (a *Aggregator) rebuildRecentState(tx kv.RwTx) error { - t := time.Now() - var err error - trees := map[FileType]*btree.BTreeG[*AggregateItem]{} - - a.changesBtree.Ascend(func(i btree.Item) bool { - item := i.(*ChangesItem) - for fType := FirstType; fType < NumberOfStateTypes; fType++ { - tree, ok := trees[fType] - if !ok { - tree = btree.NewG[*AggregateItem](32, AggregateItemLess) - trees[fType] = tree - } - var changes Changes - changes.Init(fType.String(), a.aggregationStep, a.diffDir, false /* beforeOn */) - if err = changes.openFiles(item.startBlock, false /* write */); err != nil { - return false - } - var prefixLen int - if fType == Storage { - prefixLen = length.Addr - } - - var commitMerger commitmentMerger - if fType == Commitment { - commitMerger = mergeCommitments - } - - if err = changes.aggregateToBtree(tree, prefixLen, commitMerger); err != nil { - return false - } - if err = changes.closeFiles(); err != nil { - return false - } - } - return true - }) - if err != nil { - return err - } - for fType, tree := range trees { - table := fType.Table() - tree.Ascend(func(item *AggregateItem) bool { - if len(item.v) == 0 { - return true - } - var v []byte - if v, err = tx.GetOne(table, item.k); err != nil { - return false - } - if item.count != binary.BigEndian.Uint32(v[:4]) { - err = fmt.Errorf("mismatched count for %x: change file %d, db: %d", item.k, item.count, binary.BigEndian.Uint32(v[:4])) - return false - } - if !bytes.Equal(item.v, v[4:]) { - err = fmt.Errorf("mismatched v for %x: change file [%x], db: [%x]", item.k, item.v, v[4:]) - return false - } - return true - }) - } - if err != nil { - return err - } - log.Info("reconstructed recent state", "in", time.Since(t)) - return nil -} - -type AggregationTask struct { - bt [NumberOfStateTypes]*btree.BTreeG[*AggregateItem] - changes [NumberOfStateTypes]Changes - blockFrom uint64 - blockTo uint64 -} - -func (a *Aggregator) removeLocked(fType FileType, toRemove []*byEndBlockItem, item *byEndBlockItem) { - a.fileLocks[fType].Lock() - defer a.fileLocks[fType].Unlock() - if len(toRemove) > 1 { - for _, ag := range toRemove { - a.files[fType].Delete(ag) - } - a.files[fType].ReplaceOrInsert(item) - } -} - -func (a *Aggregator) removeLockedState( - accountsToRemove []*byEndBlockItem, accountsItem *byEndBlockItem, - codeToRemove []*byEndBlockItem, codeItem *byEndBlockItem, - storageToRemove []*byEndBlockItem, storageItem *byEndBlockItem, - commitmentToRemove []*byEndBlockItem, commitmentItem *byEndBlockItem, -) { - for fType := FirstType; fType < NumberOfStateTypes; fType++ { - a.fileLocks[fType].Lock() - defer a.fileLocks[fType].Unlock() - } - if len(accountsToRemove) > 1 { - for _, ag := range accountsToRemove { - a.files[Account].Delete(ag) - } - a.files[Account].ReplaceOrInsert(accountsItem) - } - if len(codeToRemove) > 1 { - for _, ag := range codeToRemove { - a.files[Code].Delete(ag) - } - a.files[Code].ReplaceOrInsert(codeItem) - } - if len(storageToRemove) > 1 { - for _, ag := range storageToRemove { - a.files[Storage].Delete(ag) - } - a.files[Storage].ReplaceOrInsert(storageItem) - } - if len(commitmentToRemove) > 1 { - for _, ag := range commitmentToRemove { - a.files[Commitment].Delete(ag) - } - a.files[Commitment].ReplaceOrInsert(commitmentItem) - } -} - -func removeFiles(fType FileType, diffDir string, toRemove []*byEndBlockItem) error { - // Close all the memory maps etc - for _, ag := range toRemove { - if err := ag.index.Close(); err != nil { - return fmt.Errorf("close index: %w", err) - } - if err := ag.decompressor.Close(); err != nil { - return fmt.Errorf("close decompressor: %w", err) - } - } - // Delete files - // TODO: in a non-test version, this is delayed to allow other participants to roll over to the next file - for _, ag := range toRemove { - if err := os.Remove(path.Join(diffDir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), ag.startBlock, ag.endBlock))); err != nil { - return fmt.Errorf("remove decompressor file %s.%d-%d.dat: %w", fType.String(), ag.startBlock, ag.endBlock, err) - } - if err := os.Remove(path.Join(diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), ag.startBlock, ag.endBlock))); err != nil { - return fmt.Errorf("remove index file %s.%d-%d.idx: %w", fType.String(), ag.startBlock, ag.endBlock, err) - } - } - return nil -} - -// backgroundAggregation is the functin that runs in a background go-routine and performs creation of initial state files -// allowing the main goroutine to proceed -func (a *Aggregator) backgroundAggregation() { - defer a.aggWg.Done() - for aggTask := range a.aggChannel { - if a.changesets { - if historyD, historyI, bitmapD, bitmapI, err := aggTask.changes[Account].produceChangeSets(aggTask.blockFrom, aggTask.blockTo, AccountHistory, AccountBitmap); err == nil { - var historyItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - historyItem.decompressor = historyD - historyItem.index = historyI - historyItem.getter = historyItem.decompressor.MakeGetter() - historyItem.getterMerge = historyItem.decompressor.MakeGetter() - historyItem.indexReader = recsplit.NewIndexReader(historyItem.index) - historyItem.readerMerge = recsplit.NewIndexReader(historyItem.index) - a.addLocked(AccountHistory, historyItem) - var bitmapItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - bitmapItem.decompressor = bitmapD - bitmapItem.index = bitmapI - bitmapItem.getter = bitmapItem.decompressor.MakeGetter() - bitmapItem.getterMerge = bitmapItem.decompressor.MakeGetter() - bitmapItem.indexReader = recsplit.NewIndexReader(bitmapItem.index) - bitmapItem.readerMerge = recsplit.NewIndexReader(bitmapItem.index) - a.addLocked(AccountBitmap, bitmapItem) - } else { - a.aggError <- fmt.Errorf("produceChangeSets %s: %w", Account.String(), err) - return - } - if historyD, historyI, bitmapD, bitmapI, err := aggTask.changes[Storage].produceChangeSets(aggTask.blockFrom, aggTask.blockTo, StorageHistory, StorageBitmap); err == nil { - var historyItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - historyItem.decompressor = historyD - historyItem.index = historyI - historyItem.getter = historyItem.decompressor.MakeGetter() - historyItem.getterMerge = historyItem.decompressor.MakeGetter() - historyItem.indexReader = recsplit.NewIndexReader(historyItem.index) - historyItem.readerMerge = recsplit.NewIndexReader(historyItem.index) - a.addLocked(StorageHistory, historyItem) - var bitmapItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - bitmapItem.decompressor = bitmapD - bitmapItem.index = bitmapI - bitmapItem.getter = bitmapItem.decompressor.MakeGetter() - bitmapItem.getterMerge = bitmapItem.decompressor.MakeGetter() - bitmapItem.indexReader = recsplit.NewIndexReader(bitmapItem.index) - bitmapItem.readerMerge = recsplit.NewIndexReader(bitmapItem.index) - a.addLocked(StorageBitmap, bitmapItem) - } else { - a.aggError <- fmt.Errorf("produceChangeSets %s: %w", Storage.String(), err) - return - } - if historyD, historyI, bitmapD, bitmapI, err := aggTask.changes[Code].produceChangeSets(aggTask.blockFrom, aggTask.blockTo, CodeHistory, CodeBitmap); err == nil { - var historyItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - historyItem.decompressor = historyD - historyItem.index = historyI - historyItem.getter = historyItem.decompressor.MakeGetter() - historyItem.getterMerge = historyItem.decompressor.MakeGetter() - historyItem.indexReader = recsplit.NewIndexReader(historyItem.index) - historyItem.readerMerge = recsplit.NewIndexReader(historyItem.index) - a.addLocked(CodeHistory, historyItem) - var bitmapItem = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - bitmapItem.decompressor = bitmapD - bitmapItem.index = bitmapI - bitmapItem.getter = bitmapItem.decompressor.MakeGetter() - bitmapItem.getterMerge = bitmapItem.decompressor.MakeGetter() - bitmapItem.indexReader = recsplit.NewIndexReader(bitmapItem.index) - bitmapItem.readerMerge = recsplit.NewIndexReader(bitmapItem.index) - a.addLocked(CodeBitmap, bitmapItem) - } else { - a.aggError <- fmt.Errorf("produceChangeSets %s: %w", Code.String(), err) - return - } - } - typesLimit := Commitment - if a.commitments { - typesLimit = AccountHistory - } - for fType := FirstType; fType < typesLimit; fType++ { - var err error - if err = aggTask.changes[fType].closeFiles(); err != nil { - a.aggError <- fmt.Errorf("close %sChanges: %w", fType.String(), err) - return - } - var item = &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo} - if item.decompressor, item.index, err = createDatAndIndex(fType.String(), a.diffDir, aggTask.bt[fType], aggTask.blockFrom, aggTask.blockTo); err != nil { - a.aggError <- fmt.Errorf("createDatAndIndex %s: %w", fType.String(), err) - return - } - item.getter = item.decompressor.MakeGetter() - item.getterMerge = item.decompressor.MakeGetter() - item.indexReader = recsplit.NewIndexReader(item.index) - item.readerMerge = recsplit.NewIndexReader(item.index) - if err = aggTask.changes[fType].deleteFiles(); err != nil { - a.aggError <- fmt.Errorf("delete %sChanges: %w", fType.String(), err) - return - } - a.addLocked(fType, item) - } - // At this point, 3 new state files (containing latest changes) has been created for accounts, code, and storage - // Corresponding items has been added to the registy of state files, and B-tree are not necessary anymore, change files can be removed - // What follows can be performed by the 2nd background goroutine - select { - case a.mergeChannel <- struct{}{}: - default: - } - select { - case a.historyChannel <- struct{}{}: - default: - } - } -} - -type CommitmentValTransform struct { - pre [NumberOfAccountStorageTypes][]*byEndBlockItem // List of state files before the merge - post [NumberOfAccountStorageTypes][]*byEndBlockItem // List of state files after the merge -} - -func decodeU64(from []byte) uint64 { - var i uint64 - for _, b := range from { - i = (i << 8) | uint64(b) - } - return i -} - -func encodeU64(i uint64, to []byte) []byte { - // writes i to b in big endian byte order, using the least number of bytes needed to represent i. - switch { - case i < (1 << 8): - return append(to, byte(i)) - case i < (1 << 16): - return append(to, byte(i>>8), byte(i)) - case i < (1 << 24): - return append(to, byte(i>>16), byte(i>>8), byte(i)) - case i < (1 << 32): - return append(to, byte(i>>24), byte(i>>16), byte(i>>8), byte(i)) - case i < (1 << 40): - return append(to, byte(i>>32), byte(i>>24), byte(i>>16), byte(i>>8), byte(i)) - case i < (1 << 48): - return append(to, byte(i>>40), byte(i>>32), byte(i>>24), byte(i>>16), byte(i>>8), byte(i)) - case i < (1 << 56): - return append(to, byte(i>>48), byte(i>>40), byte(i>>32), byte(i>>24), byte(i>>16), byte(i>>8), byte(i)) - default: - return append(to, byte(i>>56), byte(i>>48), byte(i>>40), byte(i>>32), byte(i>>24), byte(i>>16), byte(i>>8), byte(i)) - } -} - -// commitmentValTransform parses the value of the commitment record to extract references -// to accounts and storage items, then looks them up in the new, merged files, and replaces them with -// the updated references -func (cvt *CommitmentValTransform) commitmentValTransform(val, transValBuf commitment.BranchData) ([]byte, error) { - if len(val) == 0 { - return transValBuf, nil - } - - accountPlainKeys, storagePlainKeys, err := val.ExtractPlainKeys() - if err != nil { - return nil, err - } - transAccountPks := make([][]byte, 0, len(accountPlainKeys)) - var apkBuf, spkBuf []byte - for _, accountPlainKey := range accountPlainKeys { - if len(accountPlainKey) == length.Addr { - // Non-optimised key originating from a database record - apkBuf = append(apkBuf[:0], accountPlainKey...) - } else { - // Optimised key referencing a state file record (file number and offset within the file) - fileI := int(accountPlainKey[0]) - offset := decodeU64(accountPlainKey[1:]) - g := cvt.pre[Account][fileI].getterMerge - g.Reset(offset) - apkBuf, _ = g.Next(apkBuf[:0]) - //fmt.Printf("replacing account [%x] from [%x]\n", apkBuf, accountPlainKey) - } - // Look up apkBuf in the post account files - for j := len(cvt.post[Account]); j > 0; j-- { - item := cvt.post[Account][j-1] - if item.index.Empty() { - continue - } - offset := item.readerMerge.Lookup(apkBuf) - g := item.getterMerge - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(apkBuf); keyMatch { - accountPlainKey = encodeU64(offset, []byte{byte(j - 1)}) - //fmt.Printf("replaced account [%x]=>[%x] for file [%d-%d]\n", apkBuf, accountPlainKey, item.startBlock, item.endBlock) - break - } else if j == 0 { - fmt.Printf("could not find replacement key [%x], file=%s.%d-%d]\n\n", apkBuf, Account.String(), item.startBlock, item.endBlock) - } - } - } - transAccountPks = append(transAccountPks, accountPlainKey) - } - - transStoragePks := make([][]byte, 0, len(storagePlainKeys)) - for _, storagePlainKey := range storagePlainKeys { - if len(storagePlainKey) == length.Addr+length.Hash { - // Non-optimised key originating from a database record - spkBuf = append(spkBuf[:0], storagePlainKey...) - } else { - // Optimised key referencing a state file record (file number and offset within the file) - fileI := int(storagePlainKey[0]) - offset := decodeU64(storagePlainKey[1:]) - g := cvt.pre[Storage][fileI].getterMerge - g.Reset(offset) - //fmt.Printf("offsetToKey storage [%x] offset=%d, file=%d-%d\n", storagePlainKey, offset, cvt.pre[Storage][fileI].startBlock, cvt.pre[Storage][fileI].endBlock) - spkBuf, _ = g.Next(spkBuf[:0]) - } - // Lookup spkBuf in the post storage files - for j := len(cvt.post[Storage]); j > 0; j-- { - item := cvt.post[Storage][j-1] - if item.index.Empty() { - continue - } - offset := item.readerMerge.Lookup(spkBuf) - g := item.getterMerge - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(spkBuf); keyMatch { - storagePlainKey = encodeU64(offset, []byte{byte(j - 1)}) - //fmt.Printf("replacing storage [%x] => [fileI=%d, offset=%d, file=%s.%d-%d]\n", spkBuf, j-1, offset, Storage.String(), item.startBlock, item.endBlock) - break - } else if j == 0 { - fmt.Printf("could not find replacement key [%x], file=%s.%d-%d]\n\n", spkBuf, Storage.String(), item.startBlock, item.endBlock) - } - } - } - transStoragePks = append(transStoragePks, storagePlainKey) - } - if transValBuf, err = val.ReplacePlainKeys(transAccountPks, transStoragePks, transValBuf); err != nil { - return nil, err - } - return transValBuf, nil -} - -func (a *Aggregator) backgroundMerge() { - defer a.mergeWg.Done() - for range a.mergeChannel { - t := time.Now() - var err error - var cvt CommitmentValTransform - var toRemove [NumberOfStateTypes][]*byEndBlockItem - var newItems [NumberOfStateTypes]*byEndBlockItem - var blockFrom, blockTo uint64 - lastType := Code - typesLimit := Commitment - if a.commitments { - lastType = Commitment - typesLimit = AccountHistory - } - // Lock the set of commitment (or code if commitments are off) files - those are the smallest, because account, storage and code files may be added by the aggregation thread first - toRemove[lastType], _, _, blockFrom, blockTo = a.findLargestMerge(lastType, uint64(math.MaxUint64) /* maxBlockTo */, uint64(math.MaxUint64) /* maxSpan */) - - for fType := FirstType; fType < typesLimit; fType++ { - var pre, post []*byEndBlockItem - var from, to uint64 - if fType == lastType { - from = blockFrom - to = blockTo - } else { - toRemove[fType], pre, post, from, to = a.findLargestMerge(fType, blockTo, uint64(math.MaxUint64) /* maxSpan */) - if from != blockFrom { - a.mergeError <- fmt.Errorf("%sFrom %d != blockFrom %d", fType.String(), from, blockFrom) - return - } - if to != blockTo { - a.mergeError <- fmt.Errorf("%sTo %d != blockTo %d", fType.String(), to, blockTo) - return - } - } - if len(toRemove[fType]) > 1 { - var valTransform func(commitment.BranchData, commitment.BranchData) ([]byte, error) - var mergeFunc commitmentMerger - if fType == Commitment { - valTransform = cvt.commitmentValTransform - mergeFunc = mergeCommitments - } else { - mergeFunc = mergeReplace - } - var prefixLen int - if fType == Storage { - prefixLen = length.Addr - } - if newItems[fType], err = a.computeAggregation(fType, toRemove[fType], from, to, valTransform, mergeFunc, true /* valCompressed */, true /* withIndex */, prefixLen); err != nil { - a.mergeError <- fmt.Errorf("computeAggreation %s: %w", fType.String(), err) - return - } - post = append(post, newItems[fType]) - } - if fType < NumberOfAccountStorageTypes { - cvt.pre[fType] = pre - cvt.post[fType] = post - } - } - // Switch aggregator to new state files, close and remove old files - a.removeLockedState(toRemove[Account], newItems[Account], toRemove[Code], newItems[Code], toRemove[Storage], newItems[Storage], toRemove[Commitment], newItems[Commitment]) - removed := 0 - for fType := FirstType; fType < typesLimit; fType++ { - if len(toRemove[fType]) > 1 { - removeFiles(fType, a.diffDir, toRemove[fType]) - removed += len(toRemove[fType]) - 1 - } - } - mergeTime := time.Since(t) - if mergeTime > time.Minute { - log.Info("Long merge", "from", blockFrom, "to", blockTo, "files", removed, "time", time.Since(t)) - } - } -} - -func (a *Aggregator) reduceHistoryFiles(fType FileType, item *byEndBlockItem) error { - datTmpPath := filepath.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.dat.tmp", fType.String(), item.startBlock, item.endBlock)) - datPath := filepath.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), item.startBlock, item.endBlock)) - idxPath := filepath.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), item.startBlock, item.endBlock)) - comp, err := compress.NewCompressor(context.Background(), AggregatorPrefix, datTmpPath, a.diffDir, compress.MinPatternScore, 1, log.LvlDebug) - if err != nil { - return fmt.Errorf("reduceHistoryFiles create compressor %s: %w", datPath, err) - } - defer comp.Close() - g := item.getter - var val []byte - var count int - g.Reset(0) - var key []byte - for g.HasNext() { - g.Skip() // Skip key on on the first pass - val, _ = g.Next(val[:0]) - //fmt.Printf("reduce1 [%s.%d-%d] [%x]=>[%x]\n", fType.String(), item.startBlock, item.endBlock, key, val) - if err = comp.AddWord(val); err != nil { - return fmt.Errorf("reduceHistoryFiles AddWord: %w", err) - } - count++ - } - if err = comp.Compress(); err != nil { - return fmt.Errorf("reduceHistoryFiles compress: %w", err) - } - var d *compress.Decompressor - if d, err = compress.NewDecompressor(datTmpPath); err != nil { - return fmt.Errorf("reduceHistoryFiles create decompressor: %w", err) - } - var rs *recsplit.RecSplit - if rs, err = recsplit.NewRecSplit(recsplit.RecSplitArgs{ - KeyCount: count, - Enums: false, - BucketSize: 2000, - LeafSize: 8, - TmpDir: a.diffDir, - IndexFile: idxPath, - }); err != nil { - return fmt.Errorf("reduceHistoryFiles NewRecSplit: %w", err) - } - rs.LogLvl(log.LvlDebug) - - g1 := d.MakeGetter() - for { - g.Reset(0) - g1.Reset(0) - var lastOffset uint64 - for g.HasNext() { - key, _ = g.Next(key[:0]) - g.Skip() // Skip value - _, pos := g1.Next(nil) - //fmt.Printf("reduce2 [%s.%d-%d] [%x]==>%d\n", fType.String(), item.startBlock, item.endBlock, key, lastOffset) - if err = rs.AddKey(key, lastOffset); err != nil { - return fmt.Errorf("reduceHistoryFiles %p AddKey: %w", rs, err) - } - lastOffset = pos - } - if err = rs.Build(); err != nil { - if rs.Collision() { - log.Info("Building reduceHistoryFiles. Collision happened. It's ok. Restarting...") - rs.ResetNextSalt() - } else { - return fmt.Errorf("reduceHistoryFiles Build: %w", err) - } - } else { - break - } - } - if err = item.decompressor.Close(); err != nil { - return fmt.Errorf("reduceHistoryFiles close decompressor: %w", err) - } - if err = os.Remove(datPath); err != nil { - return fmt.Errorf("reduceHistoryFiles remove: %w", err) - } - if err = os.Rename(datTmpPath, datPath); err != nil { - return fmt.Errorf("reduceHistoryFiles rename: %w", err) - } - if item.decompressor, err = compress.NewDecompressor(datPath); err != nil { - return fmt.Errorf("reduceHistoryFiles create new decompressor: %w", err) - } - item.getter = item.decompressor.MakeGetter() - item.getterMerge = item.decompressor.MakeGetter() - if item.index, err = recsplit.OpenIndex(idxPath); err != nil { - return fmt.Errorf("reduceHistoryFiles open index: %w", err) - } - item.indexReader = recsplit.NewIndexReader(item.index) - item.readerMerge = recsplit.NewIndexReader(item.index) - return nil -} - -type commitmentMerger func(prev, current, target commitment.BranchData) (commitment.BranchData, error) - -func mergeReplace(preval, val, buf commitment.BranchData) (commitment.BranchData, error) { - return append(buf, val...), nil -} - -func mergeBitmaps(preval, val, buf commitment.BranchData) (commitment.BranchData, error) { - preef, _ := eliasfano32.ReadEliasFano(preval) - ef, _ := eliasfano32.ReadEliasFano(val) - //fmt.Printf("mergeBitmaps [%x] (count=%d,max=%d) + [%x] (count=%d,max=%d)\n", preval, preef.Count(), preef.Max(), val, ef.Count(), ef.Max()) - preIt := preef.Iterator() - efIt := ef.Iterator() - newEf := eliasfano32.NewEliasFano(preef.Count()+ef.Count(), ef.Max()) - for preIt.HasNext() { - v, _ := preIt.Next() - newEf.AddOffset(v) - } - for efIt.HasNext() { - v, _ := efIt.Next() - newEf.AddOffset(v) - } - newEf.Build() - return newEf.AppendBytes(buf), nil -} - -func mergeCommitments(preval, val, buf commitment.BranchData) (commitment.BranchData, error) { - return preval.MergeHexBranches(val, buf) -} - -func (a *Aggregator) backgroundHistoryMerge() { - defer a.historyWg.Done() - for range a.historyChannel { - t := time.Now() - var err error - var toRemove [NumberOfTypes][]*byEndBlockItem - var newItems [NumberOfTypes]*byEndBlockItem - var blockFrom, blockTo uint64 - // Lock the set of commitment files - those are the smallest, because account, storage and code files may be added by the aggregation thread first - toRemove[CodeBitmap], _, _, blockFrom, blockTo = a.findLargestMerge(CodeBitmap, uint64(math.MaxUint64) /* maxBlockTo */, 500_000 /* maxSpan */) - - finalMerge := blockTo-blockFrom+1 == 500_000 - for fType := AccountHistory; fType < NumberOfTypes; fType++ { - var from, to uint64 - if fType == CodeBitmap { - from = blockFrom - to = blockTo - } else { - toRemove[fType], _, _, from, to = a.findLargestMerge(fType, blockTo, 500_000 /* maxSpan */) - if from != blockFrom { - a.historyError <- fmt.Errorf("%sFrom %d != blockFrom %d", fType.String(), from, blockFrom) - return - } - if to != blockTo { - a.historyError <- fmt.Errorf("%sTo %d != blockTo %d", fType.String(), to, blockTo) - return - } - } - if len(toRemove[fType]) > 1 { - isBitmap := fType == AccountBitmap || fType == StorageBitmap || fType == CodeBitmap - - var mergeFunc commitmentMerger - switch { - case isBitmap: - mergeFunc = mergeBitmaps - case fType == Commitment: - mergeFunc = mergeCommitments - default: - mergeFunc = mergeReplace - } - - if newItems[fType], err = a.computeAggregation(fType, toRemove[fType], from, to, nil /* valTransform */, mergeFunc, - !isBitmap /* valCompressed */, !finalMerge || isBitmap /* withIndex */, 0 /* prefixLen */); err != nil { - a.historyError <- fmt.Errorf("computeAggreation %s: %w", fType.String(), err) - return - } - } - } - if finalMerge { - // Special aggregation for blockTo - blockFrom + 1 == 500_000 - // Remove keys from the .dat files assuming that they will only be used after querying the bitmap index - // and therefore, there is no situation where non-existent key is queried. - if err = a.reduceHistoryFiles(AccountHistory, newItems[AccountHistory]); err != nil { - a.historyError <- fmt.Errorf("reduceHistoryFiles %s: %w", AccountHistory.String(), err) - return - } - if err = a.reduceHistoryFiles(StorageHistory, newItems[StorageHistory]); err != nil { - a.historyError <- fmt.Errorf("reduceHistoryFiles %s: %w", StorageHistory.String(), err) - return - } - if err = a.reduceHistoryFiles(CodeHistory, newItems[CodeHistory]); err != nil { - a.historyError <- fmt.Errorf("reduceHistoryFiles %s: %w", CodeHistory.String(), err) - return - } - } - for fType := AccountHistory; fType < NumberOfTypes; fType++ { - a.removeLocked(fType, toRemove[fType], newItems[fType]) - } - removed := 0 - for fType := AccountHistory; fType < NumberOfTypes; fType++ { - if len(toRemove[fType]) > 1 { - removeFiles(fType, a.diffDir, toRemove[fType]) - removed += len(toRemove[fType]) - 1 - } - } - mergeTime := time.Since(t) - if mergeTime > time.Minute { - log.Info("Long history merge", "from", blockFrom, "to", blockTo, "files", removed, "time", time.Since(t)) - } - } -} - -// checkOverlaps does not lock tree, because it is only called from the constructor of aggregator -func checkOverlaps(treeName string, tree *btree.BTree) error { - var minStart uint64 = math.MaxUint64 - var err error - tree.Descend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.startBlock < minStart { - if item.endBlock >= minStart { - err = fmt.Errorf("overlap of %s state files [%d-%d] with %d", treeName, item.startBlock, item.endBlock, minStart) - return false - } - if minStart != math.MaxUint64 && item.endBlock+1 != minStart { - err = fmt.Errorf("hole in %s state files [%d-%d]", treeName, item.endBlock, minStart) - return false - } - minStart = item.startBlock - } - return true - }) - return err -} - -func (a *Aggregator) openFiles(fType FileType, minArch uint64) error { - var err error - var totalKeys uint64 - a.files[fType].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.decompressor, err = compress.NewDecompressor(path.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), item.startBlock, item.endBlock))); err != nil { - return false - } - if item.index, err = recsplit.OpenIndex(path.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), item.startBlock, item.endBlock))); err != nil { - return false - } - totalKeys += item.index.KeyCount() - item.getter = item.decompressor.MakeGetter() - item.getterMerge = item.decompressor.MakeGetter() - item.indexReader = recsplit.NewIndexReader(item.index) - item.readerMerge = recsplit.NewIndexReader(item.index) - return true - }) - if fType >= NumberOfStateTypes { - return nil - } - log.Info("Creating arch...", "type", fType.String(), "total keys in all state files", totalKeys) - // Allocate arch of double of total keys - n := totalKeys * 2 - if n < minArch { - n = minArch - } - a.arches[fType] = make([]uint32, n) - arch := a.arches[fType] - var key []byte - h := a.archHasher - collisions := 0 - a.files[fType].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - g := item.getter - g.Reset(0) - blockNum := uint32(item.endBlock) - for g.HasNext() { - key, _ = g.Next(key[:0]) - h.Reset() - h.Write(key) //nolint:errcheck - p, _ := h.Sum128() - p = p % n - if arch[p] != 0 { - collisions++ - } - arch[p] = blockNum - g.Skip() - } - return true - }) - log.Info("Created arch", "type", fType.String(), "collisions", collisions) - return err -} - -func (a *Aggregator) closeFiles(fType FileType) { - a.fileLocks[fType].Lock() - defer a.fileLocks[fType].Unlock() - a.files[fType].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.decompressor != nil { - item.decompressor.Close() - } - if item.index != nil { - item.index.Close() - } - return true - }) -} - -func (a *Aggregator) Close() { - close(a.aggChannel) - a.aggWg.Wait() // Need to wait for the background aggregation to finish because it sends to merge channels - // Drain channel before closing - select { - case <-a.mergeChannel: - default: - } - close(a.mergeChannel) - if a.changesets { - // Drain channel before closing - select { - case <-a.historyChannel: - default: - } - close(a.historyChannel) - a.historyWg.Wait() - } - a.mergeWg.Wait() - // Closing state files only after background aggregation goroutine is finished - for fType := FirstType; fType < NumberOfTypes; fType++ { - a.closeFiles(fType) - } -} - -// checkOverlapWithMinStart does not need to lock tree lock, because it is only used in the constructor of Aggregator -func checkOverlapWithMinStart(treeName string, tree *btree.BTree, minStart uint64) error { - if lastStateI := tree.Max(); lastStateI != nil { - item := lastStateI.(*byEndBlockItem) - if minStart != math.MaxUint64 && item.endBlock+1 != minStart { - return fmt.Errorf("hole or overlap between %s state files and change files [%d-%d]", treeName, item.endBlock, minStart) - } - } - return nil -} - -func (a *Aggregator) readFromFiles(fType FileType, lock bool, blockNum uint64, filekey []byte, trace bool) ([]byte, uint64) { - if lock { - if fType == Commitment { - for lockFType := FirstType; lockFType < NumberOfStateTypes; lockFType++ { - a.fileLocks[lockFType].RLock() - defer a.fileLocks[lockFType].RUnlock() - } - } else { - a.fileLocks[fType].RLock() - defer a.fileLocks[fType].RUnlock() - } - } - h := a.archHasher - arch := a.arches[fType] - n := uint64(len(arch)) - if n > 0 { - h.Reset() - h.Write(filekey) //nolint:errcheck - p, _ := h.Sum128() - p = p % n - v := uint64(atomic.LoadUint32(&arch[p])) - //fmt.Printf("Reading from %s arch key [%x]=%d, %d\n", fType.String(), filekey, p, arch[p]) - if v == 0 { - return nil, 0 - } - a.files[fType].AscendGreaterOrEqual(&byEndBlockItem{startBlock: v, endBlock: v}, func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.endBlock < blockNum { - blockNum = item.endBlock - } - return false - }) - } - var val []byte - var startBlock uint64 - a.files[fType].DescendLessOrEqual(&byEndBlockItem{endBlock: blockNum}, func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if trace { - fmt.Printf("read %s %x: search in file [%d-%d]\n", fType.String(), filekey, item.startBlock, item.endBlock) - } - if item.tree != nil { - ai, ok := item.tree.Get(&AggregateItem{k: filekey}) - if !ok { - return true - } - if ai == nil { - return true - } - val = ai.v - startBlock = item.startBlock - - return false - } - if item.index.Empty() { - return true - } - offset := item.indexReader.Lookup(filekey) - g := item.getter - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(filekey); keyMatch { - val, _ = g.Next(nil) - if trace { - fmt.Printf("read %s %x: found [%x] in file [%d-%d]\n", fType.String(), filekey, val, item.startBlock, item.endBlock) - } - startBlock = item.startBlock - atomic.AddUint64(&a.fileHits, 1) - return false - } - } - atomic.AddUint64(&a.fileMisses, 1) - return true - }) - - if fType == Commitment { - // Transform references - if len(val) > 0 { - accountPlainKeys, storagePlainKeys, err := commitment.BranchData(val).ExtractPlainKeys() - if err != nil { - panic(fmt.Errorf("value %x: %w", val, err)) - } - var transAccountPks [][]byte - var transStoragePks [][]byte - for _, accountPlainKey := range accountPlainKeys { - var apkBuf []byte - if len(accountPlainKey) == length.Addr { - // Non-optimised key originating from a database record - apkBuf = accountPlainKey - } else { - // Optimised key referencing a state file record (file number and offset within the file) - fileI := int(accountPlainKey[0]) - offset := decodeU64(accountPlainKey[1:]) - apkBuf, _ = a.readByOffset(Account, fileI, offset) - } - transAccountPks = append(transAccountPks, apkBuf) - } - for _, storagePlainKey := range storagePlainKeys { - var spkBuf []byte - if len(storagePlainKey) == length.Addr+length.Hash { - // Non-optimised key originating from a database record - spkBuf = storagePlainKey - } else { - // Optimised key referencing a state file record (file number and offset within the file) - fileI := int(storagePlainKey[0]) - offset := decodeU64(storagePlainKey[1:]) - //fmt.Printf("readbyOffset(comm file %d-%d) file=%d offset=%d\n", ii.startBlock, ii.endBlock, fileI, offset) - spkBuf, _ = a.readByOffset(Storage, fileI, offset) - } - transStoragePks = append(transStoragePks, spkBuf) - } - if val, err = commitment.BranchData(val).ReplacePlainKeys(transAccountPks, transStoragePks, nil); err != nil { - panic(err) - } - } - } - return val, startBlock -} - -// readByOffset is assumed to be invoked under a read lock -func (a *Aggregator) readByOffset(fType FileType, fileI int, offset uint64) ([]byte, []byte) { - var key, val []byte - fi := 0 - a.files[fType].Ascend(func(i btree.Item) bool { - if fi < fileI { - fi++ - return true - } - item := i.(*byEndBlockItem) - //fmt.Printf("fileI=%d, file=%s.%d-%d\n", fileI, fType.String(), item.startBlock, item.endBlock) - g := item.getter - g.Reset(offset) - key, _ = g.Next(nil) - val, _ = g.Next(nil) - - return false - }) - return key, val -} - -func (a *Aggregator) MakeStateReader(blockNum uint64, tx kv.Tx) *Reader { - r := &Reader{ - a: a, - blockNum: blockNum, - tx: tx, - } - return r -} - -type Reader struct { - a *Aggregator - tx kv.Getter - blockNum uint64 -} - -func (r *Reader) ReadAccountData(addr []byte, trace bool) ([]byte, error) { - v, err := r.tx.GetOne(kv.StateAccounts, addr) - if err != nil { - return nil, err - } - if v != nil { - return v[4:], nil - } - v, _ = r.a.readFromFiles(Account, true /* lock */, r.blockNum, addr, trace) - return v, nil -} - -func (r *Reader) ReadAccountStorage(addr []byte, loc []byte, trace bool) ([]byte, error) { - // Look in the summary table first - dbkey := make([]byte, len(addr)+len(loc)) - copy(dbkey[0:], addr) - copy(dbkey[len(addr):], loc) - v, err := r.tx.GetOne(kv.StateStorage, dbkey) - if err != nil { - return nil, err - } - if v != nil { - if len(v) == 4 { - return nil, nil - } - return v[4:], nil - } - v, _ = r.a.readFromFiles(Storage, true /* lock */, r.blockNum, dbkey, trace) - return v, nil -} - -func (r *Reader) ReadAccountCode(addr []byte, trace bool) ([]byte, error) { - // Look in the summary table first - v, err := r.tx.GetOne(kv.StateCode, addr) - if err != nil { - return nil, err - } - if v != nil { - if len(v) == 4 { - return nil, nil - } - return v[4:], nil - } - // Look in the files - v, _ = r.a.readFromFiles(Code, true /* lock */, r.blockNum, addr, trace) - return v, nil -} - -func (r *Reader) ReadAccountCodeSize(addr []byte, trace bool) (int, error) { - // Look in the summary table first - v, err := r.tx.GetOne(kv.StateCode, addr) - if err != nil { - return 0, err - } - if v != nil { - return len(v) - 4, nil - } - // Look in the files. TODO - use specialised function to only lookup size - v, _ = r.a.readFromFiles(Code, true /* lock */, r.blockNum, addr, trace) - return len(v), nil -} - -type Writer struct { - tx kv.RwTx - a *Aggregator - commTree *btree.BTreeG[*CommitmentItem] // BTree used for gathering commitment data - changes [NumberOfStateTypes]Changes - blockNum uint64 - changeFileNum uint64 // Block number associated with the current change files. It is the last block number whose changes will go into that file -} - -func (a *Aggregator) MakeStateWriter(beforeOn bool) *Writer { - w := &Writer{ - a: a, - commTree: btree.NewG[*CommitmentItem](32, commitmentItemLess), - } - for fType := FirstType; fType < NumberOfStateTypes; fType++ { - w.changes[fType].Init(fType.String(), a.aggregationStep, a.diffDir, w.a.changesets && fType != Commitment /* we do not unwind commitment ? */) - } - return w -} - -func (w *Writer) Close() { - typesLimit := Commitment - if w.a.commitments { - typesLimit = AccountHistory - } - for fType := FirstType; fType < typesLimit; fType++ { - w.changes[fType].closeFiles() - } -} - -func (w *Writer) Reset(blockNum uint64, tx kv.RwTx) error { - w.tx = tx - w.blockNum = blockNum - typesLimit := Commitment - if w.a.commitments { - typesLimit = AccountHistory - } - if blockNum > w.changeFileNum { - for fType := FirstType; fType < typesLimit; fType++ { - if err := w.changes[fType].closeFiles(); err != nil { - return err - } - } - if w.changeFileNum != 0 { - w.a.changesBtree.ReplaceOrInsert(&ChangesItem{startBlock: w.changeFileNum + 1 - w.a.aggregationStep, endBlock: w.changeFileNum, fileCount: 12}) - } - } - if w.changeFileNum == 0 || blockNum > w.changeFileNum { - for fType := FirstType; fType < typesLimit; fType++ { - if err := w.changes[fType].openFiles(blockNum, true /* write */); err != nil { - return err - } - } - w.changeFileNum = blockNum - (blockNum % w.a.aggregationStep) + w.a.aggregationStep - 1 - } - return nil -} - -type CommitmentItem struct { - plainKey []byte - hashedKey []byte - u commitment.Update -} - -func commitmentItemLess(i, j *CommitmentItem) bool { - return bytes.Compare(i.hashedKey, j.hashedKey) < 0 -} -func (i *CommitmentItem) Less(than btree.Item) bool { - return bytes.Compare(i.hashedKey, than.(*CommitmentItem).hashedKey) < 0 -} - -func (w *Writer) branchFn(prefix []byte) ([]byte, error) { - for lockFType := FirstType; lockFType < NumberOfStateTypes; lockFType++ { - w.a.fileLocks[lockFType].RLock() - defer w.a.fileLocks[lockFType].RUnlock() - } - // Look in the summary table first - mergedVal, err := w.tx.GetOne(kv.StateCommitment, prefix) - if err != nil { - return nil, err - } - if mergedVal != nil { - mergedVal = mergedVal[4:] - } - // Look in the files and merge, while it becomes complete - var startBlock = w.blockNum + 1 - for mergedVal == nil || !commitment.BranchData(mergedVal).IsComplete() { - if startBlock == 0 { - panic(fmt.Sprintf("Incomplete branch data prefix [%x], mergeVal=[%x], startBlock=%d\n", commitment.CompactedKeyToHex(prefix), mergedVal, startBlock)) - } - var val commitment.BranchData - val, startBlock = w.a.readFromFiles(Commitment, false /* lock */, startBlock-1, prefix, false /* trace */) - if val == nil { - if mergedVal == nil { - return nil, nil - } - panic(fmt.Sprintf("Incomplete branch data prefix [%x], mergeVal=[%x], startBlock=%d\n", commitment.CompactedKeyToHex(prefix), mergedVal, startBlock)) - } - var err error - //fmt.Printf("Pre-merge prefix [%x] [%x]+[%x], startBlock %d\n", commitment.CompactToHex(prefix), val, mergedVal, startBlock) - if mergedVal == nil { - mergedVal = val - } else if mergedVal, err = val.MergeHexBranches(mergedVal, nil); err != nil { - return nil, err - } - //fmt.Printf("Post-merge prefix [%x] [%x], startBlock %d\n", commitment.CompactToHex(prefix), mergedVal, startBlock) - } - if mergedVal == nil { - return nil, nil - } - //fmt.Printf("Returning branch data prefix [%x], mergeVal=[%x], startBlock=%d\n", commitment.CompactToHex(prefix), mergedVal, startBlock) - return mergedVal[2:], nil // Skip touchMap but keep afterMap -} - -func bytesToUint64(buf []byte) (x uint64) { - for i, b := range buf { - x = x<<8 + uint64(b) - if i == 7 { - return - } - } - return -} - -func (w *Writer) accountFn(plainKey []byte, cell *commitment.Cell) error { - // Look in the summary table first - enc, err := w.tx.GetOne(kv.StateAccounts, plainKey) - if err != nil { - return err - } - if enc != nil { - enc = enc[4:] - } else { - // Look in the files - enc, _ = w.a.readFromFiles(Account, true /* lock */, w.blockNum, plainKey, false /* trace */) - } - cell.Nonce = 0 - cell.Balance.Clear() - copy(cell.CodeHash[:], commitment.EmptyCodeHash) - - if len(enc) > 0 { - pos := 0 - nonceBytes := int(enc[pos]) - pos++ - if nonceBytes > 0 { - cell.Nonce = bytesToUint64(enc[pos : pos+nonceBytes]) - pos += nonceBytes - } - balanceBytes := int(enc[pos]) - pos++ - if balanceBytes > 0 { - cell.Balance.SetBytes(enc[pos : pos+balanceBytes]) - } - } - enc, err = w.tx.GetOne(kv.StateCode, plainKey) - if err != nil { - return err - } - if enc != nil { - enc = enc[4:] - } else { - // Look in the files - enc, _ = w.a.readFromFiles(Code, true /* lock */, w.blockNum, plainKey, false /* trace */) - } - if len(enc) > 0 { - w.a.keccak.Reset() - w.a.keccak.Write(enc) - w.a.keccak.(io.Reader).Read(cell.CodeHash[:]) - } - return nil -} - -func (w *Writer) storageFn(plainKey []byte, cell *commitment.Cell) error { - // Look in the summary table first - enc, err := w.tx.GetOne(kv.StateStorage, plainKey) - if err != nil { - return err - } - if enc != nil { - enc = enc[4:] - } else { - // Look in the files - enc, _ = w.a.readFromFiles(Storage, true /* lock */, w.blockNum, plainKey, false /* trace */) - } - cell.StorageLen = len(enc) - copy(cell.Storage[:], enc) - return nil -} - -func (w *Writer) captureCommitmentType(fType FileType, trace bool, f func(commTree *btree.BTreeG[*CommitmentItem], h hash.Hash, key, val []byte)) { - lastOffsetKey := 0 - lastOffsetVal := 0 - for i, offsetKey := range w.changes[fType].keys.wordOffsets { - offsetVal := w.changes[fType].after.wordOffsets[i] - key := w.changes[fType].keys.words[lastOffsetKey:offsetKey] - val := w.changes[fType].after.words[lastOffsetVal:offsetVal] - if trace { - fmt.Printf("captureCommitmentData %s [%x]=>[%x]\n", fType.String(), key, val) - } - f(w.commTree, w.a.keccak, key, val) - lastOffsetKey = offsetKey - lastOffsetVal = offsetVal - } -} - -func (w *Writer) captureCommitmentData(trace bool) { - if trace { - fmt.Printf("captureCommitmentData start w.commTree.Len()=%d\n", w.commTree.Len()) - } - w.captureCommitmentType(Code, trace, func(commTree *btree.BTreeG[*CommitmentItem], h hash.Hash, key, val []byte) { - h.Reset() - h.Write(key) - hashedKey := h.Sum(nil) - var c = &CommitmentItem{plainKey: common.Copy(key), hashedKey: make([]byte, len(hashedKey)*2)} - for i, b := range hashedKey { - c.hashedKey[i*2] = (b >> 4) & 0xf - c.hashedKey[i*2+1] = b & 0xf - } - c.u.Flags = commitment.CODE_UPDATE - item, found := commTree.Get(&CommitmentItem{hashedKey: c.hashedKey}) - if found && item != nil { - if item.u.Flags&commitment.BALANCE_UPDATE != 0 { - c.u.Flags |= commitment.BALANCE_UPDATE - c.u.Balance.Set(&item.u.Balance) - } - if item.u.Flags&commitment.NONCE_UPDATE != 0 { - c.u.Flags |= commitment.NONCE_UPDATE - c.u.Nonce = item.u.Nonce - } - if item.u.Flags == commitment.DELETE_UPDATE && len(val) == 0 { - c.u.Flags = commitment.DELETE_UPDATE - } else { - h.Reset() - h.Write(val) - h.(io.Reader).Read(c.u.CodeHashOrStorage[:]) - } - } else { - h.Reset() - h.Write(val) - h.(io.Reader).Read(c.u.CodeHashOrStorage[:]) - } - commTree.ReplaceOrInsert(c) - }) - w.captureCommitmentType(Account, trace, func(commTree *btree.BTreeG[*CommitmentItem], h hash.Hash, key, val []byte) { - h.Reset() - h.Write(key) - hashedKey := h.Sum(nil) - var c = &CommitmentItem{plainKey: common.Copy(key), hashedKey: make([]byte, len(hashedKey)*2)} - for i, b := range hashedKey { - c.hashedKey[i*2] = (b >> 4) & 0xf - c.hashedKey[i*2+1] = b & 0xf - } - if len(val) == 0 { - c.u.Flags = commitment.DELETE_UPDATE - } else { - c.u.DecodeForStorage(val) - c.u.Flags = commitment.BALANCE_UPDATE | commitment.NONCE_UPDATE - item, found := commTree.Get(&CommitmentItem{hashedKey: c.hashedKey}) - - if found && item != nil { - if item.u.Flags&commitment.CODE_UPDATE != 0 { - c.u.Flags |= commitment.CODE_UPDATE - copy(c.u.CodeHashOrStorage[:], item.u.CodeHashOrStorage[:]) - } - } - } - commTree.ReplaceOrInsert(c) - }) - w.captureCommitmentType(Storage, trace, func(commTree *btree.BTreeG[*CommitmentItem], h hash.Hash, key, val []byte) { - hashedKey := make([]byte, 2*length.Hash) - h.Reset() - h.Write(key[:length.Addr]) - h.(io.Reader).Read(hashedKey[:length.Hash]) - h.Reset() - h.Write(key[length.Addr:]) - h.(io.Reader).Read(hashedKey[length.Hash:]) - var c = &CommitmentItem{plainKey: common.Copy(key), hashedKey: make([]byte, len(hashedKey)*2)} - for i, b := range hashedKey { - c.hashedKey[i*2] = (b >> 4) & 0xf - c.hashedKey[i*2+1] = b & 0xf - } - c.u.ValLength = len(val) - if len(val) > 0 { - copy(c.u.CodeHashOrStorage[:], val) - } - if len(val) == 0 { - c.u.Flags = commitment.DELETE_UPDATE - } else { - c.u.Flags = commitment.STORAGE_UPDATE - } - commTree.ReplaceOrInsert(c) - }) - if trace { - fmt.Printf("captureCommitmentData end w.commTree.Len()=%d\n", w.commTree.Len()) - } -} - -// computeCommitment is computing the commitment to the state after -// the change would have been applied. -// It assumes that the state accessible via the aggregator has already been -// modified with the new values -// At the moment, it is specific version for hex merkle patricia tree commitment -// but it will be extended to support other types of commitments -func (w *Writer) computeCommitment(trace bool) ([]byte, error) { - if trace { - fmt.Printf("computeCommitment w.commTree.Len()=%d\n", w.commTree.Len()) - } - - plainKeys := make([][]byte, w.commTree.Len()) - hashedKeys := make([][]byte, w.commTree.Len()) - updates := make([]commitment.Update, w.commTree.Len()) - j := 0 - w.commTree.Ascend(func(item *CommitmentItem) bool { - plainKeys[j] = item.plainKey - hashedKeys[j] = item.hashedKey - updates[j] = item.u - j++ - return true - }) - - if len(plainKeys) == 0 { - return w.a.hph.RootHash() - } - - w.a.hph.Reset() - w.a.hph.ResetFns(w.branchFn, w.accountFn, w.storageFn) - w.a.hph.SetTrace(trace) - - rootHash, branchNodeUpdates, err := w.a.hph.ProcessUpdates(plainKeys, hashedKeys, updates) - if err != nil { - return nil, err - } - - for prefixStr, branchNodeUpdate := range branchNodeUpdates { - if branchNodeUpdate == nil { - continue - } - prefix := []byte(prefixStr) - var prevV []byte - var prevNum uint32 - if prevV, err = w.tx.GetOne(kv.StateCommitment, prefix); err != nil { - return nil, err - } - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - - var original commitment.BranchData - if prevV == nil { - original, _ = w.a.readFromFiles(Commitment, true /* lock */, w.blockNum, prefix, false) - } else { - original = prevV[4:] - } - if original != nil { - // try to merge previous (original) and current (branchNodeUpdate) into one update - mergedVal, err := original.MergeHexBranches(branchNodeUpdate, nil) - if err != nil { - return nil, err - } - if w.a.trace { - fmt.Printf("computeCommitment merge [%x] [%x]+[%x]=>[%x]\n", commitment.CompactedKeyToHex(prefix), original, branchNodeUpdate, mergedVal) - } - branchNodeUpdate = mergedVal - } - - //fmt.Printf("computeCommitment set [%x] [%x]\n", commitment.CompactToHex(prefix), branchNodeUpdate) - v := make([]byte, 4+len(branchNodeUpdate)) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - copy(v[4:], branchNodeUpdate) - - if err = w.tx.Put(kv.StateCommitment, prefix, v); err != nil { - return nil, err - } - if len(branchNodeUpdate) == 0 { - w.changes[Commitment].delete(prefix, original) - } else { - if prevV == nil && len(original) == 0 { - w.changes[Commitment].insert(prefix, branchNodeUpdate) - } else { - w.changes[Commitment].update(prefix, original, branchNodeUpdate) - } - } - } - - return rootHash, nil -} - -func (w *Writer) FinishTx(txNum uint64, trace bool) error { - if w.a.commitments { - w.captureCommitmentData(trace) - } - var err error - for fType := FirstType; fType < Commitment; fType++ { - if err = w.changes[fType].finish(txNum); err != nil { - return fmt.Errorf("finish %sChanges: %w", fType.String(), err) - } - } - return nil -} - -func (w *Writer) ComputeCommitment(trace bool) ([]byte, error) { - if !w.a.commitments { - return nil, fmt.Errorf("commitments turned off") - } - comm, err := w.computeCommitment(trace) - if err != nil { - return nil, fmt.Errorf("compute commitment: %w", err) - } - w.commTree.Clear(true) - if err = w.changes[Commitment].finish(w.blockNum); err != nil { - return nil, fmt.Errorf("finish commChanges: %w", err) - } - return comm, nil -} - -// Aggegate should be called to check if the aggregation is required, and -// if it is required, perform it -func (w *Writer) Aggregate(trace bool) error { - if w.blockNum < w.a.unwindLimit+w.a.aggregationStep-1 { - return nil - } - diff := w.blockNum - w.a.unwindLimit - if (diff+1)%w.a.aggregationStep != 0 { - return nil - } - if err := w.aggregateUpto(diff+1-w.a.aggregationStep, diff); err != nil { - return fmt.Errorf("aggregateUpto(%d, %d): %w", diff+1-w.a.aggregationStep, diff, err) - } - return nil -} - -func (w *Writer) UpdateAccountData(addr []byte, account []byte, trace bool) error { - var prevNum uint32 - prevV, err := w.tx.GetOne(kv.StateAccounts, addr) - if err != nil { - return err - } - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - var original []byte - if prevV == nil { - original, _ = w.a.readFromFiles(Account, true /* lock */, w.blockNum, addr, trace) - } else { - original = prevV[4:] - } - if bytes.Equal(account, original) { - // No change - return nil - } - v := make([]byte, 4+len(account)) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - copy(v[4:], account) - if err = w.tx.Put(kv.StateAccounts, addr, v); err != nil { - return err - } - if prevV == nil && len(original) == 0 { - w.changes[Account].insert(addr, account) - } else { - w.changes[Account].update(addr, original, account) - } - if trace { - w.a.trace = true - w.a.tracedKeys[string(addr)] = struct{}{} - } - return nil -} - -func (w *Writer) UpdateAccountCode(addr []byte, code []byte, trace bool) error { - var prevNum uint32 - prevV, err := w.tx.GetOne(kv.StateCode, addr) - if err != nil { - return err - } - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - var original []byte - if prevV == nil { - original, _ = w.a.readFromFiles(Code, true /* lock */, w.blockNum, addr, trace) - } else { - original = prevV[4:] - } - v := make([]byte, 4+len(code)) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - copy(v[4:], code) - if err = w.tx.Put(kv.StateCode, addr, v); err != nil { - return err - } - if prevV == nil && len(original) == 0 { - w.changes[Code].insert(addr, code) - } else { - w.changes[Code].update(addr, original, code) - } - if trace { - w.a.trace = true - w.a.tracedKeys[string(addr)] = struct{}{} - } - return nil -} - -type CursorType uint8 - -const ( - FILE_CURSOR CursorType = iota - DB_CURSOR - TREE_CURSOR -) - -// CursorItem is the item in the priority queue used to do merge interation -// over storage of a given account -type CursorItem struct { - c kv.Cursor - dg *compress.Getter - tree *btree.BTreeG[*AggregateItem] - key []byte - val []byte - endBlock uint64 - t CursorType // Whether this item represents state file or DB record, or tree -} - -type CursorHeap []*CursorItem - -func (ch CursorHeap) Len() int { - return len(ch) -} - -func (ch CursorHeap) Less(i, j int) bool { - cmp := bytes.Compare(ch[i].key, ch[j].key) - if cmp == 0 { - // when keys match, the items with later blocks are preferred - return ch[i].endBlock > ch[j].endBlock - } - return cmp < 0 -} - -func (ch *CursorHeap) Swap(i, j int) { - (*ch)[i], (*ch)[j] = (*ch)[j], (*ch)[i] -} - -func (ch *CursorHeap) Push(x interface{}) { - *ch = append(*ch, x.(*CursorItem)) -} - -func (ch *CursorHeap) Pop() interface{} { - old := *ch - n := len(old) - x := old[n-1] - old[n-1] = nil - *ch = old[0 : n-1] - return x -} - -func (w *Writer) deleteAccount(addr []byte, trace bool) (bool, error) { - prevV, err := w.tx.GetOne(kv.StateAccounts, addr) - if err != nil { - return false, err - } - var prevNum uint32 - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - var original []byte - if prevV == nil { - original, _ = w.a.readFromFiles(Account, true /* lock */, w.blockNum, addr, trace) - if original == nil { - return false, nil - } - } else { - original = prevV[4:] - } - v := make([]byte, 4) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - if err = w.tx.Put(kv.StateAccounts, addr, v); err != nil { - return false, err - } - w.changes[Account].delete(addr, original) - return true, nil -} - -func (w *Writer) deleteCode(addr []byte, trace bool) error { - prevV, err := w.tx.GetOne(kv.StateCode, addr) - if err != nil { - return err - } - var prevNum uint32 - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - var original []byte - if prevV == nil { - original, _ = w.a.readFromFiles(Code, true /* lock */, w.blockNum, addr, trace) - if original == nil { - // Nothing to do - return nil - } - } else { - original = prevV[4:] - } - v := make([]byte, 4) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - if err = w.tx.Put(kv.StateCode, addr, v); err != nil { - return err - } - w.changes[Code].delete(addr, original) - return nil -} - -func (w *Writer) DeleteAccount(addr []byte, trace bool) error { - deleted, err := w.deleteAccount(addr, trace) - if err != nil { - return err - } - if !deleted { - return nil - } - w.a.fileLocks[Storage].RLock() - defer w.a.fileLocks[Storage].RUnlock() - w.deleteCode(addr, trace) - // Find all storage items for this address - var cp CursorHeap - heap.Init(&cp) - var c kv.Cursor - if c, err = w.tx.Cursor(kv.StateStorage); err != nil { - return err - } - defer c.Close() - var k, v []byte - if k, v, err = c.Seek(addr); err != nil { - return err - } - if k != nil && bytes.HasPrefix(k, addr) { - heap.Push(&cp, &CursorItem{t: DB_CURSOR, key: common.Copy(k), val: common.Copy(v), c: c, endBlock: w.blockNum}) - } - w.a.files[Storage].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.tree != nil { - item.tree.AscendGreaterOrEqual(&AggregateItem{k: addr}, func(aitem *AggregateItem) bool { - if !bytes.HasPrefix(aitem.k, addr) { - return false - } - if len(aitem.k) == len(addr) { - return true - } - heap.Push(&cp, &CursorItem{t: TREE_CURSOR, key: aitem.k, val: aitem.v, tree: item.tree, endBlock: item.endBlock}) - return false - }) - return true - } - if item.index.Empty() { - return true - } - offset := item.indexReader.Lookup(addr) - g := item.getter - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(addr); !keyMatch { - //fmt.Printf("DeleteAccount %x - not found anchor in file [%d-%d]\n", addr, item.startBlock, item.endBlock) - return true - } - g.Skip() - } - if g.HasNext() { - key, _ := g.Next(nil) - if bytes.HasPrefix(key, addr) { - val, _ := g.Next(nil) - heap.Push(&cp, &CursorItem{t: FILE_CURSOR, key: key, val: val, dg: g, endBlock: item.endBlock}) - } - } - return true - }) - for cp.Len() > 0 { - lastKey := common.Copy(cp[0].key) - lastVal := common.Copy(cp[0].val) - // Advance all the items that have this key (including the top) - for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) { - ci1 := cp[0] - switch ci1.t { - case FILE_CURSOR: - if ci1.dg.HasNext() { - ci1.key, _ = ci1.dg.Next(ci1.key[:0]) - if bytes.HasPrefix(ci1.key, addr) { - ci1.val, _ = ci1.dg.Next(ci1.val[:0]) - heap.Fix(&cp, 0) - } else { - heap.Pop(&cp) - } - } else { - heap.Pop(&cp) - } - case DB_CURSOR: - k, v, err = ci1.c.Next() - if err != nil { - return err - } - if k != nil && bytes.HasPrefix(k, addr) { - ci1.key = common.Copy(k) - ci1.val = common.Copy(v) - heap.Fix(&cp, 0) - } else { - heap.Pop(&cp) - } - case TREE_CURSOR: - skip := true - var aitem *AggregateItem - ci1.tree.AscendGreaterOrEqual(&AggregateItem{k: ci1.key}, func(ai *AggregateItem) bool { - if skip { - skip = false - return true - } - aitem = ai - return false - }) - if aitem != nil && bytes.HasPrefix(aitem.k, addr) { - ci1.key = aitem.k - ci1.val = aitem.v - heap.Fix(&cp, 0) - } else { - heap.Pop(&cp) - } - } - } - var prevV []byte - prevV, err = w.tx.GetOne(kv.StateStorage, lastKey) - if err != nil { - return err - } - var prevNum uint32 - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - v = make([]byte, 4) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - if err = w.tx.Put(kv.StateStorage, lastKey, v); err != nil { - return err - } - w.changes[Storage].delete(lastKey, lastVal) - } - if trace { - w.a.trace = true - w.a.tracedKeys[string(addr)] = struct{}{} - } - return nil -} - -func (w *Writer) WriteAccountStorage(addr, loc []byte, value []byte, trace bool) error { - dbkey := make([]byte, len(addr)+len(loc)) - copy(dbkey[0:], addr) - copy(dbkey[len(addr):], loc) - prevV, err := w.tx.GetOne(kv.StateStorage, dbkey) - if err != nil { - return err - } - var prevNum uint32 - if prevV != nil { - prevNum = binary.BigEndian.Uint32(prevV[:4]) - } - var original []byte - if prevV == nil { - original, _ = w.a.readFromFiles(Storage, true /* lock */, w.blockNum, dbkey, trace) - } else { - original = prevV[4:] - } - if bytes.Equal(value, original) { - // No change - return nil - } - v := make([]byte, 4+len(value)) - binary.BigEndian.PutUint32(v[:4], prevNum+1) - copy(v[4:], value) - if err = w.tx.Put(kv.StateStorage, dbkey, v); err != nil { - return err - } - if prevV == nil && len(original) == 0 { - w.changes[Storage].insert(dbkey, value) - } else { - w.changes[Storage].update(dbkey, original, value) - } - if trace { - w.a.trace = true - w.a.tracedKeys[string(dbkey)] = struct{}{} - } - return nil -} - -// findLargestMerge looks through the state files of the speficied type and determines the largest merge that can be undertaken -// a state file block [a; b] is valid if its length is a divisor of its starting block, or `(b-a+1) = 0 mod a` -func (a *Aggregator) findLargestMerge(fType FileType, maxTo uint64, maxSpan uint64) (toAggregate []*byEndBlockItem, pre []*byEndBlockItem, post []*byEndBlockItem, aggFrom uint64, aggTo uint64) { - a.fileLocks[fType].RLock() - defer a.fileLocks[fType].RUnlock() - var maxEndBlock uint64 - a.files[fType].DescendLessOrEqual(&byEndBlockItem{endBlock: maxTo}, func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.decompressor == nil { - return true - } - maxEndBlock = item.endBlock - return false - }) - if maxEndBlock == 0 { - return - } - a.files[fType].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.decompressor == nil { - return true // Skip B-tree based items - } - pre = append(pre, item) - if aggTo == 0 { - var doubleEnd uint64 - nextDouble := item.endBlock - for nextDouble <= maxEndBlock && nextDouble-item.startBlock < maxSpan { - doubleEnd = nextDouble - nextDouble = doubleEnd + (doubleEnd - item.startBlock) + 1 - } - if doubleEnd != item.endBlock { - aggFrom = item.startBlock - aggTo = doubleEnd - } else { - post = append(post, item) - return true - } - } - toAggregate = append(toAggregate, item) - return item.endBlock < aggTo - }) - return -} - -func (a *Aggregator) computeAggregation(fType FileType, - toAggregate []*byEndBlockItem, aggFrom uint64, aggTo uint64, - valTransform func(val, transValBuf commitment.BranchData) ([]byte, error), - mergeFunc commitmentMerger, - valCompressed bool, - withIndex bool, prefixLen int) (*byEndBlockItem, error) { - var item2 = &byEndBlockItem{startBlock: aggFrom, endBlock: aggTo} - var cp CursorHeap - heap.Init(&cp) - for _, ag := range toAggregate { - g := ag.decompressor.MakeGetter() - g.Reset(0) - if g.HasNext() { - key, _ := g.Next(nil) - val, _ := g.Next(nil) - heap.Push(&cp, &CursorItem{t: FILE_CURSOR, dg: g, key: key, val: val, endBlock: ag.endBlock}) - } - } - var err error - var count int - if item2.decompressor, count, err = a.mergeIntoStateFile(&cp, prefixLen, fType, aggFrom, aggTo, a.diffDir, valTransform, mergeFunc, valCompressed); err != nil { - return nil, fmt.Errorf("mergeIntoStateFile %s [%d-%d]: %w", fType.String(), aggFrom, aggTo, err) - } - item2.getter = item2.decompressor.MakeGetter() - item2.getterMerge = item2.decompressor.MakeGetter() - if withIndex { - idxPath := filepath.Join(a.diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), aggFrom, aggTo)) - if item2.index, err = buildIndex(item2.decompressor, idxPath, a.diffDir, count); err != nil { - return nil, fmt.Errorf("mergeIntoStateFile buildIndex %s [%d-%d]: %w", fType.String(), aggFrom, aggTo, err) - } - item2.indexReader = recsplit.NewIndexReader(item2.index) - item2.readerMerge = recsplit.NewIndexReader(item2.index) - } - return item2, nil -} - -func createDatAndIndex(treeName string, diffDir string, bt *btree.BTreeG[*AggregateItem], blockFrom uint64, blockTo uint64) (*compress.Decompressor, *recsplit.Index, error) { - datPath := filepath.Join(diffDir, fmt.Sprintf("%s.%d-%d.dat", treeName, blockFrom, blockTo)) - idxPath := filepath.Join(diffDir, fmt.Sprintf("%s.%d-%d.idx", treeName, blockFrom, blockTo)) - count, err := btreeToFile(bt, datPath, diffDir, false /* trace */, 1 /* workers */) - if err != nil { - return nil, nil, fmt.Errorf("createDatAndIndex %s build btree: %w", treeName, err) - } - var d *compress.Decompressor - if d, err = compress.NewDecompressor(datPath); err != nil { - return nil, nil, fmt.Errorf("createDatAndIndex %s decompressor: %w", treeName, err) - } - var index *recsplit.Index - if index, err = buildIndex(d, idxPath, diffDir, count); err != nil { - return nil, nil, fmt.Errorf("createDatAndIndex %s buildIndex: %w", treeName, err) - } - return d, index, nil -} - -func (a *Aggregator) addLocked(fType FileType, item *byEndBlockItem) { - a.fileLocks[fType].Lock() - defer a.fileLocks[fType].Unlock() - a.files[fType].ReplaceOrInsert(item) -} - -func (w *Writer) aggregateUpto(blockFrom, blockTo uint64) error { - // React on any previous error of aggregation or merge - select { - case err := <-w.a.aggError: - return err - case err := <-w.a.mergeError: - return err - case err := <-w.a.historyError: - return err - default: - } - typesLimit := Commitment - if w.a.commitments { - typesLimit = AccountHistory - } - t0 := time.Now() - t := time.Now() - i := w.a.changesBtree.Get(&ChangesItem{startBlock: blockFrom, endBlock: blockTo}) - if i == nil { - return fmt.Errorf("did not find change files for [%d-%d], w.a.changesBtree.Len() = %d", blockFrom, blockTo, w.a.changesBtree.Len()) - } - item := i.(*ChangesItem) - if item.startBlock != blockFrom { - return fmt.Errorf("expected change files[%d-%d], got [%d-%d]", blockFrom, blockTo, item.startBlock, item.endBlock) - } - w.a.changesBtree.Delete(i) - var aggTask AggregationTask - for fType := FirstType; fType < typesLimit; fType++ { - aggTask.changes[fType].Init(fType.String(), w.a.aggregationStep, w.a.diffDir, w.a.changesets && fType != Commitment) - } - var err error - for fType := FirstType; fType < typesLimit; fType++ { - var prefixLen int - if fType == Storage { - prefixLen = length.Addr - } - - var commitMerger commitmentMerger - if fType == Commitment { - commitMerger = mergeCommitments - } - - if aggTask.bt[fType], err = aggTask.changes[fType].aggregate(blockFrom, blockTo, prefixLen, w.tx, fType.Table(), commitMerger); err != nil { - return fmt.Errorf("aggregate %sChanges: %w", fType.String(), err) - } - } - aggTask.blockFrom = blockFrom - aggTask.blockTo = blockTo - aggTime := time.Since(t) - t = time.Now() - // At this point, all the changes are gathered in 4 B-trees (accounts, code, storage and commitment) and removed from the database - // What follows can be done in the 1st background goroutine - for fType := FirstType; fType < typesLimit; fType++ { - if fType < NumberOfStateTypes { - w.a.updateArch(aggTask.bt[fType], fType, uint32(aggTask.blockTo)) - } - } - updateArchTime := time.Since(t) - t = time.Now() - for fType := FirstType; fType < typesLimit; fType++ { - w.a.addLocked(fType, &byEndBlockItem{startBlock: aggTask.blockFrom, endBlock: aggTask.blockTo, tree: aggTask.bt[fType]}) - } - switchTime := time.Since(t) - w.a.aggChannel <- &aggTask - handoverTime := time.Since(t0) - if handoverTime > time.Second { - log.Info("Long handover to background aggregation", "from", blockFrom, "to", blockTo, "composition", aggTime, "arch update", updateArchTime, "switch", switchTime) - } - return nil -} - -// mergeIntoStateFile assumes that all entries in the cp heap have type FILE_CURSOR -func (a *Aggregator) mergeIntoStateFile(cp *CursorHeap, prefixLen int, - fType FileType, startBlock, endBlock uint64, dir string, - valTransform func(val, transValBuf commitment.BranchData) ([]byte, error), - mergeFunc commitmentMerger, - valCompressed bool, -) (*compress.Decompressor, int, error) { - datPath := filepath.Join(dir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), startBlock, endBlock)) - comp, err := compress.NewCompressor(context.Background(), AggregatorPrefix, datPath, dir, compress.MinPatternScore, 1, log.LvlDebug) - if err != nil { - return nil, 0, fmt.Errorf("compressor %s: %w", datPath, err) - } - defer comp.Close() - count := 0 - // In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`. - // `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away - // instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned - // to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop - // (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind - var keyBuf, valBuf, transValBuf []byte - for cp.Len() > 0 { - lastKey := common.Copy((*cp)[0].key) - lastVal := common.Copy((*cp)[0].val) - var mergedOnce bool - if a.trace { - if _, ok := a.tracedKeys[string(lastKey)]; ok { - fmt.Printf("looking at key %x val [%x] endBlock %d to merge into [%d-%d]\n", lastKey, lastVal, (*cp)[0].endBlock, startBlock, endBlock) - } - } - // Advance all the items that have this key (including the top) - for cp.Len() > 0 && bytes.Equal((*cp)[0].key, lastKey) { - ci1 := (*cp)[0] - if a.trace { - if _, ok := a.tracedKeys[string(ci1.key)]; ok { - fmt.Printf("skipping same key %x val [%x] endBlock %d to merge into [%d-%d]\n", ci1.key, ci1.val, ci1.endBlock, startBlock, endBlock) - } - } - if ci1.t != FILE_CURSOR { - return nil, 0, fmt.Errorf("mergeIntoStateFile: cursor of unexpected type: %d", ci1.t) - } - if mergedOnce { - //fmt.Printf("mergeIntoStateFile pre-merge prefix [%x], [%x]+[%x]\n", commitment.CompactToHex(lastKey), ci1.val, lastVal) - if lastVal, err = mergeFunc(ci1.val, lastVal, nil); err != nil { - return nil, 0, fmt.Errorf("mergeIntoStateFile: merge values: %w", err) - } - //fmt.Printf("mergeIntoStateFile post-merge prefix [%x], [%x]\n", commitment.CompactToHex(lastKey), lastVal) - } else { - mergedOnce = true - } - if ci1.dg.HasNext() { - ci1.key, _ = ci1.dg.Next(ci1.key[:0]) - if valCompressed { - ci1.val, _ = ci1.dg.Next(ci1.val[:0]) - } else { - ci1.val, _ = ci1.dg.NextUncompressed() - } - - heap.Fix(cp, 0) - } else { - heap.Pop(cp) - } - } - var skip bool - switch fType { - case Storage: - // Inside storage files, there is a special item with empty value, and the key equal to the contract's address - // This special item is inserted before the contract storage items, in order to find them using un-ordered index - // (for the purposes of SELF-DESTRUCT and some RPC methods that require enumeration of contract storage) - // We will only skip this special item if there are no more corresponding storage items left - // (this is checked further down with `bytes.HasPrefix(lastKey, keyBuf)`) - skip = startBlock == 0 && len(lastVal) == 0 && len(lastKey) != prefixLen - case Commitment: - // For commitments, the 3rd and 4th bytes of the value (zero-based 2 and 3) contain so-called `afterMap` - // Its bit are set for children that are present in the tree, and unset for those that are not (deleted, for example) - // If all bits are zero (check below), this branch can be skipped, since it is empty - skip = startBlock == 0 && len(lastVal) >= 4 && lastVal[2] == 0 && lastVal[3] == 0 - case AccountHistory, StorageHistory, CodeHistory: - skip = false - default: - // For the rest of types, empty value means deletion - skip = startBlock == 0 && len(lastVal) == 0 - } - if skip { // Deleted marker can be skipped if we merge into the first file, except for the storage addr marker - if _, ok := a.tracedKeys[string(keyBuf)]; ok { - fmt.Printf("skipped key %x for [%d-%d]\n", keyBuf, startBlock, endBlock) - } - } else { - // The check `bytes.HasPrefix(lastKey, keyBuf)` is checking whether the `lastKey` is the first item - // of some contract's storage, and `keyBuf` (the item just before that) is the special item with the - // key being contract's address. If so, the special item (keyBuf => []) needs to be preserved - if keyBuf != nil && (prefixLen == 0 || len(keyBuf) != prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { - if err = comp.AddWord(keyBuf); err != nil { - return nil, 0, err - } - if a.trace { - if _, ok := a.tracedKeys[string(keyBuf)]; ok { - fmt.Printf("merge key %x val [%x] into [%d-%d]\n", keyBuf, valBuf, startBlock, endBlock) - } - } - count++ // Only counting keys, not values - if valTransform != nil { - if transValBuf, err = valTransform(valBuf, transValBuf[:0]); err != nil { - return nil, 0, fmt.Errorf("mergeIntoStateFile -valTransform [%x]: %w", valBuf, err) - } - - if err = comp.AddWord(transValBuf); err != nil { - return nil, 0, err - } - } else if valCompressed { - if err = comp.AddWord(valBuf); err != nil { - return nil, 0, err - } - } else { - if err = comp.AddUncompressedWord(valBuf); err != nil { - return nil, 0, err - } - } - //if fType == Storage { - // fmt.Printf("merge %s.%d-%d [%x]=>[%x]\n", fType.String(), startBlock, endBlock, keyBuf, valBuf) - //} - } - - keyBuf = append(keyBuf[:0], lastKey...) - valBuf = append(valBuf[:0], lastVal...) - } - } - if keyBuf != nil { - if err = comp.AddWord(keyBuf); err != nil { - return nil, 0, err - } - if a.trace { - if _, ok := a.tracedKeys[string(keyBuf)]; ok { - fmt.Printf("merge key %x val [%x] into [%d-%d]\n", keyBuf, valBuf, startBlock, endBlock) - } - } - count++ // Only counting keys, not values - if valTransform != nil { - if transValBuf, err = valTransform(valBuf, transValBuf[:0]); err != nil { - return nil, 0, fmt.Errorf("mergeIntoStateFile valTransform [%x]: %w", valBuf, err) - } - if err = comp.AddWord(transValBuf); err != nil { - return nil, 0, err - } - } else if valCompressed { - if err = comp.AddWord(valBuf); err != nil { - return nil, 0, err - } - } else { - if err = comp.AddUncompressedWord(valBuf); err != nil { - return nil, 0, err - } - } - //if fType == Storage { - // fmt.Printf("merge %s.%d-%d [%x]=>[%x]\n", fType.String(), startBlock, endBlock, keyBuf, valBuf) - //} - } - if err = comp.Compress(); err != nil { - return nil, 0, err - } - var d *compress.Decompressor - if d, err = compress.NewDecompressor(datPath); err != nil { - return nil, 0, fmt.Errorf("decompressor: %w", err) - } - return d, count, nil -} - -func (a *Aggregator) stats(fType FileType) (count int, datSize, idxSize int64) { - a.fileLocks[fType].RLock() - defer a.fileLocks[fType].RUnlock() - count = 0 - datSize = 0 - idxSize = 0 - a.files[fType].Ascend(func(i btree.Item) bool { - item := i.(*byEndBlockItem) - if item.decompressor != nil { - count++ - datSize += item.decompressor.Size() - count++ - idxSize += item.index.Size() - } - return true - }) - return -} - -type FilesStats struct { - AccountsCount int - AccountsDatSize int64 - AccountsIdxSize int64 - CodeCount int - CodeDatSize int64 - CodeIdxSize int64 - StorageCount int - StorageDatSize int64 - StorageIdxSize int64 - CommitmentCount int - CommitmentDatSize int64 - CommitmentIdxSize int64 - Hits uint64 - Misses uint64 -} - -func (a *Aggregator) Stats() FilesStats { - var fs FilesStats - fs.AccountsCount, fs.AccountsDatSize, fs.AccountsIdxSize = a.stats(Account) - fs.CodeCount, fs.CodeDatSize, fs.CodeIdxSize = a.stats(Code) - fs.StorageCount, fs.StorageDatSize, fs.StorageIdxSize = a.stats(Storage) - fs.CommitmentCount, fs.CommitmentDatSize, fs.CommitmentIdxSize = a.stats(Commitment) - fs.Hits = atomic.LoadUint64(&a.fileHits) - fs.Misses = atomic.LoadUint64(&a.fileMisses) - return fs -} diff --git a/aggregator/aggregator_test.go b/aggregator/aggregator_test.go deleted file mode 100644 index 6a7f396bf..000000000 --- a/aggregator/aggregator_test.go +++ /dev/null @@ -1,314 +0,0 @@ -/* - Copyright 2022 Erigon contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package aggregator - -import ( - "bytes" - "encoding/binary" - "testing" - - "github.com/holiman/uint256" - - "github.com/ledgerwatch/erigon-lib/commitment" - "github.com/ledgerwatch/erigon-lib/kv/memdb" -) - -func int160(i uint64) []byte { - b := make([]byte, 20) - binary.BigEndian.PutUint64(b[12:], i) - return b -} - -func int256(i uint64) []byte { - b := make([]byte, 32) - binary.BigEndian.PutUint64(b[24:], i) - return b -} - -func accountWithBalance(i uint64) []byte { - balance := uint256.NewInt(i) - var l int - l++ - l++ - if i > 0 { - l += balance.ByteLen() - } - l++ - l++ - value := make([]byte, l) - pos := 0 - value[pos] = 0 - pos++ - if balance.IsZero() { - value[pos] = 0 - pos++ - } else { - balanceBytes := balance.ByteLen() - value[pos] = byte(balanceBytes) - pos++ - balance.WriteToSlice(value[pos : pos+balanceBytes]) - pos += balanceBytes - } - value[pos] = 0 - pos++ - value[pos] = 0 - return value -} - -func TestSimpleAggregator(t *testing.T) { - _, rwTx := memdb.NewTestTx(t) - - tmpDir := t.TempDir() - trie := commitment.InitializeTrie(commitment.VariantHexPatriciaTrie) - a, err := NewAggregator(tmpDir, 16, 4, true, true, 1000, trie, rwTx) - if err != nil { - t.Fatal(err) - } - defer a.Close() - w := a.MakeStateWriter(true /* beforeOn */) - if err = w.Reset(0, rwTx); err != nil { - t.Fatal(err) - } - defer w.Close() - var account1 = accountWithBalance(1) - w.UpdateAccountData(int160(1), account1, false /* trace */) - if err = w.FinishTx(0, false); err != nil { - t.Fatal(err) - } - if err = w.Aggregate(false /* trace */); err != nil { - t.Fatal(err) - } - r := a.MakeStateReader(2, rwTx) - acc, err := r.ReadAccountData(int160(1), false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(acc, account1) { - t.Errorf("read account %x, expected account %x", acc, account1) - } - if err = rwTx.Commit(); err != nil { - t.Fatal(err) - } -} - -func TestLoopAggregator(t *testing.T) { - _, rwTx := memdb.NewTestTx(t) - - tmpDir := t.TempDir() - trie := commitment.InitializeTrie(commitment.VariantHexPatriciaTrie) - a, err := NewAggregator(tmpDir, 16, 4, true, true, 1000, trie, rwTx) - if err != nil { - t.Fatal(err) - } - defer a.Close() - var account1 = accountWithBalance(1) - w := a.MakeStateWriter(true /* beforeOn */) - defer w.Close() - for blockNum := uint64(0); blockNum < 1000; blockNum++ { - accountKey := int160(blockNum/10 + 1) - //fmt.Printf("blockNum = %d\n", blockNum) - if err = w.Reset(blockNum, rwTx); err != nil { - t.Fatal(err) - } - w.UpdateAccountData(accountKey, account1, false /* trace */) - if err = w.FinishTx(blockNum, false /* trace */); err != nil { - t.Fatal(err) - } - if err = w.Aggregate(false /* trace */); err != nil { - t.Fatal(err) - } - r := a.MakeStateReader(blockNum+1, rwTx) - acc, err := r.ReadAccountData(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(acc, account1) { - t.Errorf("read account %x, expected account %x for block %d", acc, account1, blockNum) - } - account1 = accountWithBalance(blockNum + 2) - } - if err = rwTx.Commit(); err != nil { - t.Fatal(err) - } -} - -func TestRecreateAccountWithStorage(t *testing.T) { - _, rwTx := memdb.NewTestTx(t) - tmpDir := t.TempDir() - - trie := commitment.InitializeTrie(commitment.VariantHexPatriciaTrie) - a, err := NewAggregator(tmpDir, 16, 4, true, true, 1000, trie, rwTx) - if err != nil { - t.Fatal(err) - } - defer a.Close() - accountKey := int160(1) - var account1 = accountWithBalance(1) - var account2 = accountWithBalance(2) - w := a.MakeStateWriter(true /* beforeOn */) - defer w.Close() - for blockNum := uint64(0); blockNum < 100; blockNum++ { - if err = w.Reset(blockNum, rwTx); err != nil { - t.Fatal(err) - } - switch blockNum { - case 1: - w.UpdateAccountData(accountKey, account1, false /* trace */) - for s := uint64(0); s < 100; s++ { - w.WriteAccountStorage(accountKey, int256(s), uint256.NewInt(s+1).Bytes(), false /* trace */) - } - case 22: - w.DeleteAccount(accountKey, false /* trace */) - case 45: - w.UpdateAccountData(accountKey, account2, false /* trace */) - for s := uint64(50); s < 150; s++ { - w.WriteAccountStorage(accountKey, int256(s), uint256.NewInt(2*s+1).Bytes(), false /* trace */) - } - } - if err = w.FinishTx(blockNum, false /* trace */); err != nil { - t.Fatal(err) - } - if err = w.Aggregate(false /* trace */); err != nil { - t.Fatal(err) - } - r := a.MakeStateReader(blockNum+1, rwTx) - switch blockNum { - case 1: - acc, err := r.ReadAccountData(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(account1, acc) { - t.Errorf("wrong account after block %d, expected %x, got %x", blockNum, account1, acc) - } - for s := uint64(0); s < 100; s++ { - v, err := r.ReadAccountStorage(accountKey, int256(s), false /* trace */) - if err != nil { - t.Fatal(err) - } - if !uint256.NewInt(s + 1).Eq(uint256.NewInt(0).SetBytes(v)) { - t.Errorf("wrong storage value after block %d, expected %d, got %d", blockNum, s+1, uint256.NewInt(0).SetBytes(v)) - } - } - case 22, 44: - acc, err := r.ReadAccountData(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if len(acc) > 0 { - t.Errorf("wrong account after block %d, expected nil, got %x", blockNum, acc) - } - for s := uint64(0); s < 100; s++ { - v, err := r.ReadAccountStorage(accountKey, int256(s), false /* trace */) - if err != nil { - t.Fatal(err) - } - if v != nil { - t.Errorf("wrong storage value after block %d, expected nil, got %d", blockNum, uint256.NewInt(0).SetBytes(v)) - } - } - case 66: - acc, err := r.ReadAccountData(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(account2, acc) { - t.Errorf("wrong account after block %d, expected %x, got %x", blockNum, account1, acc) - } - for s := uint64(0); s < 150; s++ { - v, err := r.ReadAccountStorage(accountKey, int256(s), false /* trace */) - if err != nil { - t.Fatal(err) - } - if s < 50 { - if v != nil { - t.Errorf("wrong storage value after block %d, expected nil, got %d", blockNum, uint256.NewInt(0).SetBytes(v)) - } - } else if v == nil || !uint256.NewInt(2*s+1).Eq(uint256.NewInt(0).SetBytes(v)) { - t.Errorf("wrong storage value after block %d, expected %d, got %d", blockNum, 2*s+1, uint256.NewInt(0).SetBytes(v)) - } - } - } - } - if err = rwTx.Commit(); err != nil { - t.Fatal(err) - } -} - -func TestChangeCode(t *testing.T) { - _, rwTx := memdb.NewTestTx(t) - - tmpDir := t.TempDir() - trie := commitment.InitializeTrie(commitment.VariantHexPatriciaTrie) - a, err := NewAggregator(tmpDir, 16, 4, true, true, 1000, trie, rwTx) - if err != nil { - t.Fatal(err) - } - defer a.Close() - accountKey := int160(1) - var account1 = accountWithBalance(1) - var code1 = []byte("This is the code number 1") - w := a.MakeStateWriter(true /* beforeOn */) - defer w.Close() - for blockNum := uint64(0); blockNum < 100; blockNum++ { - if err = w.Reset(blockNum, rwTx); err != nil { - t.Fatal(err) - } - switch blockNum { - case 1: - w.UpdateAccountData(accountKey, account1, false /* trace */) - w.UpdateAccountCode(accountKey, code1, false /* trace */) - case 25: - w.DeleteAccount(accountKey, false /* trace */) - } - if err = w.FinishTx(blockNum, false /* trace */); err != nil { - t.Fatal(err) - } - if err = w.Aggregate(false /* trace */); err != nil { - t.Fatal(err) - } - r := a.MakeStateReader(blockNum+1, rwTx) - switch blockNum { - case 22: - acc, err := r.ReadAccountData(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(account1, acc) { - t.Errorf("wrong account after block %d, expected %x, got %x", blockNum, account1, acc) - } - code, err := r.ReadAccountCode(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(code1, code) { - t.Errorf("wrong code after block %d, expected %x, got %x", blockNum, code1, code) - } - case 47: - code, err := r.ReadAccountCode(accountKey, false /* trace */) - if err != nil { - t.Fatal(err) - } - if code != nil { - t.Errorf("wrong code after block %d, expected nil, got %x", blockNum, code) - } - } - } - if err = rwTx.Commit(); err != nil { - t.Fatal(err) - } -} diff --git a/aggregator/history.go b/aggregator/history.go deleted file mode 100644 index b45c83462..000000000 --- a/aggregator/history.go +++ /dev/null @@ -1,354 +0,0 @@ -/* - Copyright 2022 Erigon contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package aggregator - -import ( - "encoding/binary" - "fmt" - "io/fs" - "os" - "path" - "regexp" - "strconv" - "strings" - - "github.com/google/btree" - "github.com/holiman/uint256" - "github.com/ledgerwatch/erigon-lib/compress" - "github.com/ledgerwatch/erigon-lib/recsplit" - "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" - "github.com/ledgerwatch/log/v3" -) - -// History is a utility class that allows reading history of state -// from state files, history files, and bitmap files produced by an Aggregator -type History struct { - files [NumberOfTypes]*btree.BTreeG[*byEndBlockItem] - diffDir string // Directory where the state diff files are stored - aggregationStep uint64 -} - -func NewHistory(diffDir string, blockTo uint64, aggregationStep uint64) (*History, error) { - h := &History{ - diffDir: diffDir, - aggregationStep: aggregationStep, - } - for fType := FirstType; fType < NumberOfTypes; fType++ { - h.files[fType] = btree.NewG(32, ByEndBlockItemLess) - } - var closeStateFiles = true // It will be set to false in case of success at the end of the function - defer func() { - // Clean up all decompressor and indices upon error - if closeStateFiles { - h.Close() - } - }() - // Scan the diff directory and create the mapping of end blocks to files - files, err := os.ReadDir(diffDir) - if err != nil { - return nil, err - } - h.scanStateFiles(files, blockTo) - for fType := FirstType; fType < NumberOfTypes; fType++ { - if err := h.openFiles(fType); err != nil { - return nil, fmt.Errorf("opening %s state files: %w", fType.String(), err) - } - } - closeStateFiles = false - return h, nil -} - -func (h *History) scanStateFiles(files []fs.DirEntry, blockTo uint64) { - typeStrings := make([]string, NumberOfTypes) - for fType := FileType(0); fType < NumberOfTypes; fType++ { - typeStrings[fType] = fType.String() - } - re := regexp.MustCompile("^(" + strings.Join(typeStrings, "|") + ").([0-9]+)-([0-9]+).(dat|idx)$") - var err error - for _, f := range files { - name := f.Name() - subs := re.FindStringSubmatch(name) - if len(subs) != 5 { - if len(subs) != 0 { - log.Warn("File ignored by history, more than 4 submatches", "name", name, "submatches", len(subs)) - } - continue - } - var startBlock, endBlock uint64 - if startBlock, err = strconv.ParseUint(subs[2], 10, 64); err != nil { - log.Warn("File ignored by history, parsing startBlock", "error", err, "name", name) - continue - } - if endBlock, err = strconv.ParseUint(subs[3], 10, 64); err != nil { - log.Warn("File ignored by history, parsing endBlock", "error", err, "name", name) - continue - } - if startBlock > endBlock { - log.Warn("File ignored by history, startBlock > endBlock", "name", name) - continue - } - if endBlock > blockTo { - // Only load files up to specified block - continue - } - fType, ok := ParseFileType(subs[1]) - if !ok { - log.Warn("File ignored by history, type unknown", "type", subs[1]) - } - var item = &byEndBlockItem{startBlock: startBlock, endBlock: endBlock} - var foundI *byEndBlockItem - h.files[fType].AscendGreaterOrEqual(&byEndBlockItem{startBlock: endBlock, endBlock: endBlock}, func(it *byEndBlockItem) bool { - if it.endBlock == endBlock { - foundI = it - } - return false - }) - if foundI == nil || foundI.startBlock > startBlock { - h.files[fType].ReplaceOrInsert(item) - log.Info("Load file", "name", name, "type", fType.String(), "endBlock", item.endBlock) - } - } -} - -func (h *History) openFiles(fType FileType) error { - var err error - h.files[fType].Ascend(func(item *byEndBlockItem) bool { - if item.decompressor, err = compress.NewDecompressor(path.Join(h.diffDir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), item.startBlock, item.endBlock))); err != nil { - return false - } - if item.index, err = recsplit.OpenIndex(path.Join(h.diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), item.startBlock, item.endBlock))); err != nil { - return false - } - item.getter = item.decompressor.MakeGetter() - item.getterMerge = item.decompressor.MakeGetter() - item.indexReader = recsplit.NewIndexReader(item.index) - item.readerMerge = recsplit.NewIndexReader(item.index) - return true - }) - return err -} - -func (h *History) closeFiles(fType FileType) { - h.files[fType].Ascend(func(item *byEndBlockItem) bool { - if item.decompressor != nil { - item.decompressor.Close() - } - if item.index != nil { - item.index.Close() - } - return true - }) -} - -func (h *History) Close() { - // Closing state files only after background aggregation goroutine is finished - for fType := FirstType; fType < NumberOfTypes; fType++ { - h.closeFiles(fType) - } -} - -func (h *History) MakeHistoryReader() *HistoryReader { - r := &HistoryReader{ - h: h, - } - return r -} - -type HistoryReader struct { - h *History - search byEndBlockItem - blockNum uint64 - txNum uint64 - lastTx bool // Whether it is the last transaction in the block -} - -func (hr *HistoryReader) SetNums(blockNum, txNum uint64, lastTx bool) { - hr.blockNum = blockNum - hr.txNum = txNum - hr.lastTx = lastTx -} - -func (hr *HistoryReader) searchInHistory(bitmapType, historyType FileType, key []byte, trace bool) (bool, []byte, error) { - if trace { - fmt.Printf("searchInHistory %s %s [%x] blockNum %d, txNum %d\n", bitmapType.String(), historyType.String(), key, hr.blockNum, hr.txNum) - } - searchBlock := hr.blockNum - if hr.lastTx { - searchBlock++ - } - searchTx := hr.txNum - hr.search.endBlock = searchBlock - hr.search.startBlock = searchBlock - (searchBlock % 500_000) - var eliasVal []byte - var err error - var found bool - var foundTxNum uint64 - var foundEndBlock uint64 - hr.h.files[bitmapType].AscendGreaterOrEqual(&hr.search, func(item *byEndBlockItem) bool { - offset := item.indexReader.Lookup(key) - g := item.getter - g.Reset(offset) - if keyMatch, _ := g.Match(key); keyMatch { - if trace { - fmt.Printf("Found bitmap for [%x] in %s.[%d-%d]\n", key, bitmapType.String(), item.startBlock, item.endBlock) - } - eliasVal, _ = g.NextUncompressed() - ef, _ := eliasfano32.ReadEliasFano(eliasVal) - it := ef.Iterator() - if trace { - for it.HasNext() { - v, _ := it.Next() - fmt.Printf(" %d", v) - } - fmt.Printf("\n") - } - foundTxNum, found = ef.Search(searchTx) - if found { - foundEndBlock = item.endBlock - return false - } - } - // Not found, next - return true - }) - if err != nil { - return false, nil, err - } - if !found { - return false, nil, nil - } - if trace { - fmt.Printf("found in tx %d, endBlock %d\n", foundTxNum, foundEndBlock) - } - var lookupKey = make([]byte, len(key)+8) - binary.BigEndian.PutUint64(lookupKey, foundTxNum) - copy(lookupKey[8:], key) - var historyItem *byEndBlockItem - hr.search.endBlock = foundEndBlock - hr.search.startBlock = foundEndBlock - 499_999 - var ok bool - historyItem, ok = hr.h.files[historyType].Get(&hr.search) - if !ok || historyItem == nil { - return false, nil, fmt.Errorf("no %s file found for %d", historyType.String(), foundEndBlock) - } - offset := historyItem.indexReader.Lookup(lookupKey) - if trace { - fmt.Printf("Lookup [%x] in %s.[%d-%d].idx = %d\n", lookupKey, historyType.String(), historyItem.startBlock, historyItem.endBlock, offset) - } - historyItem.getter.Reset(offset) - v, _ := historyItem.getter.Next(nil) - return true, v, nil -} - -func (hr *HistoryReader) ReadAccountData(addr []byte, trace bool) ([]byte, error) { - // Look in the history first - hOk, v, err := hr.searchInHistory(AccountBitmap, AccountHistory, addr, trace) - if err != nil { - return nil, err - } - if hOk { - if trace { - fmt.Printf("ReadAccountData %x, found in history [%x]\n", addr, v) - } - return v, nil - } - if trace { - fmt.Printf("ReadAccountData %x, not found in history, get from the state\n", addr) - } - // Not found in history - look in the state files - return hr.h.readFromFiles(Account, addr, trace), nil -} - -func (hr *HistoryReader) ReadAccountStorage(addr []byte, loc []byte, trace bool) (*uint256.Int, error) { - // Look in the history first - dbkey := make([]byte, len(addr)+len(loc)) - copy(dbkey[0:], addr) - copy(dbkey[len(addr):], loc) - hOk, v, err := hr.searchInHistory(StorageBitmap, StorageHistory, dbkey, trace) - if err != nil { - return nil, err - } - if hOk { - return new(uint256.Int).SetBytes(v), nil - } - // Not found in history, look in the state files - v = hr.h.readFromFiles(Storage, dbkey, trace) - if v != nil { - return new(uint256.Int).SetBytes(v), nil - } - return nil, nil -} - -func (hr *HistoryReader) ReadAccountCode(addr []byte, trace bool) ([]byte, error) { - // Look in the history first - hOk, v, err := hr.searchInHistory(CodeBitmap, CodeHistory, addr, false) - if err != nil { - return nil, err - } - if hOk { - return v, err - } - // Not found in history, look in the history files - return hr.h.readFromFiles(Code, addr, trace), nil -} - -func (hr *HistoryReader) ReadAccountCodeSize(addr []byte, trace bool) (int, error) { - // Look in the history first - hOk, v, err := hr.searchInHistory(CodeBitmap, CodeHistory, addr, false) - if err != nil { - return 0, err - } - if hOk { - return len(v), err - } - // Not found in history, look in the history files - return len(hr.h.readFromFiles(Code, addr, trace)), nil -} - -func (h *History) readFromFiles(fType FileType, filekey []byte, trace bool) []byte { - var val []byte - h.files[fType].Descend(func(item *byEndBlockItem) bool { - if trace { - fmt.Printf("read %s %x: search in file [%d-%d]\n", fType.String(), filekey, item.startBlock, item.endBlock) - } - if item.tree != nil { - ai, ok := item.tree.Get(&AggregateItem{k: filekey}) - if !ok || ai == nil { - return true - } - val = ai.v - return false - } - if item.index.Empty() { - return true - } - offset := item.indexReader.Lookup(filekey) - g := item.getter - g.Reset(offset) - if g.HasNext() { - if keyMatch, _ := g.Match(filekey); keyMatch { - val, _ = g.Next(nil) - if trace { - fmt.Printf("read %s %x: found [%x] in file [%d-%d]\n", fType.String(), filekey, val, item.startBlock, item.endBlock) - } - return false - } - } - return true - }) - return val -} diff --git a/state/aggregator_test.go b/state/aggregator_test.go index e7f387624..65e9ba1cf 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -505,7 +505,9 @@ func pivotKeysFromKV(dataPath string) ([][]byte, error) { return listing, nil } -func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount int) string { +func generateCompressedKV(tb testing.TB, tmp string, keySize, valueSize, keyCount int) string { + tb.Helper() + args := BtIndexWriterArgs{ IndexFile: path.Join(tmp, fmt.Sprintf("%dk.bt", keyCount/1000)), TmpDir: tmp, @@ -513,7 +515,7 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount } iw, err := NewBtIndexWriter(args) - require.NoError(t, err) + require.NoError(tb, err) defer iw.Close() rnd := rand.New(rand.NewSource(0)) @@ -521,30 +523,30 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount dataPath := path.Join(tmp, fmt.Sprintf("%dk.kv", keyCount/1000)) comp, err := compress.NewCompressor(context.Background(), "cmp", dataPath, tmp, compress.MinPatternScore, 1, log.LvlDebug) - require.NoError(t, err) + require.NoError(tb, err) for i := 0; i < keyCount; i++ { key := make([]byte, keySize) n, err := rnd.Read(key[:]) - require.EqualValues(t, keySize, n) + require.EqualValues(tb, keySize, n) binary.BigEndian.PutUint64(key[keySize-8:], uint64(i)) - require.NoError(t, err) + require.NoError(tb, err) err = comp.AddWord(key[:]) - require.NoError(t, err) + require.NoError(tb, err) n, err = rnd.Read(values[:rnd.Intn(valueSize)+1]) - require.NoError(t, err) + require.NoError(tb, err) err = comp.AddWord(values[:n]) - require.NoError(t, err) + require.NoError(tb, err) } err = comp.Compress() - require.NoError(t, err) + require.NoError(tb, err) comp.Close() decomp, err := compress.NewDecompressor(dataPath) - require.NoError(t, err) + require.NoError(tb, err) getter := decomp.MakeGetter() getter.Reset(0) @@ -553,19 +555,19 @@ func generateCompressedKV(t testing.TB, tmp string, keySize, valueSize, keyCount key := make([]byte, keySize) for i := 0; i < keyCount; i++ { if !getter.HasNext() { - t.Fatalf("not enough values at %d", i) + tb.Fatalf("not enough values at %d", i) break } keys, _ := getter.Next(key[:0]) - err = iw.AddKey(keys[:], uint64(pos)) + err = iw.AddKey(keys[:], pos) pos = getter.Skip() - require.NoError(t, err) + require.NoError(tb, err) } decomp.Close() - require.NoError(t, iw.Build()) + require.NoError(tb, iw.Build()) iw.Close() return decomp.FilePath() diff --git a/state/btree_index.go b/state/btree_index.go index cd7d00108..dd7129016 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -34,13 +34,6 @@ func min64(a, b uint64) uint64 { return b } -func max64(a, b uint64) uint64 { - if a > b { - return a - } - return b -} - type markupCursor struct { l, p, di, si uint64 //l - level @@ -172,7 +165,7 @@ func newBtAlloc(k, M uint64, trace bool) *btAlloc { a.N = ncount if trace { - fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-uint64(k))/float64(a.N)) + fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-k)/float64(a.N)) for i, v := range a.sons { fmt.Printf("L%d=%v\n", i, v) } @@ -181,6 +174,8 @@ func newBtAlloc(k, M uint64, trace bool) *btAlloc { return a } +// nolint +// another implementation of traverseDfs supposed to be a bit cleaner but buggy yet func (a *btAlloc) traverseTrick() { for l := 0; l < len(a.sons)-1; l++ { if len(a.sons[l]) < 2 { @@ -330,6 +325,7 @@ func (a *btAlloc) traverseDfs() { a.cursors[c.l] = c a.cursors[pc.l] = pc + //nolint for l := pc.l; l >= 0; l-- { pc := a.cursors[l] uncles := a.sons[pc.l][pc.p] @@ -460,7 +456,7 @@ func (a *btAlloc) Seek(ik []byte) (*Cursor, error) { var ( lm, rm int64 L, R = uint64(0), uint64(len(a.nodes[0]) - 1) - minD, maxD = uint64(0), uint64(a.K) + minD, maxD = uint64(0), a.K ln node ) @@ -608,9 +604,7 @@ type BtIndexWriter struct { lvl log.Lvl maxOffset uint64 prevOffset uint64 - delta uint64 minDelta uint64 - batchSizeLimit uint64 indexW *bufio.Writer indexF *os.File bucketCollector *etl.Collector // Collector that sorts by buckets @@ -676,7 +670,6 @@ func (btw *BtIndexWriter) loadFuncBucket(k, v []byte, _ etl.CurrentTableReader, return nil } - // Build has to be called after all the keys have been added, and it initiates the process // of building the perfect hash function and writing index into a file func (btw *BtIndexWriter) Build() error { @@ -761,19 +754,17 @@ func (btw *BtIndexWriter) AddKey(key []byte, offset uint64) error { } type BtIndex struct { - alloc *btAlloc - mmapWin *[mmap.MaxMapSize]byte - mmapUnix []byte - data []byte - file *os.File - size int64 - modTime time.Time - filePath string - keyCount uint64 - baseDataID uint64 - bytesPerRec int - dataoffset uint64 - + alloc *btAlloc + mmapWin *[mmap.MaxMapSize]byte + mmapUnix []byte + data []byte + file *os.File + size int64 + modTime time.Time + filePath string + keyCount uint64 + bytesPerRec int + dataoffset uint64 auxBuf []byte decompressor *compress.Decompressor getter *compress.Getter @@ -808,7 +799,7 @@ func BuildBtreeIndexWithDecompressor(indexPath string, kv *compress.Decompressor emptys := 0 for getter.HasNext() { key, kp := getter.Next(key[:0]) - err = iw.AddKey(key[:], uint64(pos)) + err = iw.AddKey(key[:], pos) if err != nil { return err } @@ -853,7 +844,7 @@ func BuildBtreeIndex(dataPath, indexPath string) error { var pos uint64 for getter.HasNext() { key, _ := getter.Next(key[:0]) - err = iw.AddKey(key[:], uint64(pos)) + err = iw.AddKey(key[:], pos) if err != nil { return err } diff --git a/state/domain.go b/state/domain.go index b06b95de3..4aa276a88 100644 --- a/state/domain.go +++ b/state/domain.go @@ -168,8 +168,8 @@ func NewDomain( keysTable: keysTable, valsTable: valsTable, //prefixLen: prefixLen, - files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), - roFiles: *atomic2.NewPointer(&[]ctxItem{}), + files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), + roFiles: *atomic2.NewPointer(&[]ctxItem{}), } var err error @@ -576,8 +576,7 @@ type DomainContext struct { d *Domain files []ctxItem getters []*compress.Getter - bts []*BtIndex - readers []*recsplit.IndexReader + readers []*BtIndex hc *HistoryContext keyBuf [60]byte // 52b key and 8b for inverted step numBuf [8]byte @@ -596,28 +595,17 @@ func (dc *DomainContext) statelessGetter(i int) *compress.Getter { } func (dc *DomainContext) statelessBtree(i int) *BtIndex { - if dc.bts == nil { - dc.bts = make([]*BtIndex, len(dc.files)) - } - r := dc.bts[i] - if r == nil { - r = dc.files[i].src.bindex - dc.bts[i] = r - } - return r -} - -func (dc *DomainContext) statelessIdxReader(i int) *recsplit.IndexReader { if dc.readers == nil { - dc.readers = make([]*recsplit.IndexReader, len(dc.files)) + dc.readers = make([]*BtIndex, len(dc.files)) } r := dc.readers[i] if r == nil { - r = recsplit.NewIndexReader(dc.files[i].src.index) + r = dc.files[i].src.bindex dc.readers[i] = r } return r } + func (d *Domain) collectFilesStats() (datsz, idxsz, files uint64) { d.History.files.Walk(func(items []*filesItem) bool { for _, item := range items { diff --git a/state/domain_committed.go b/state/domain_committed.go index 8304e46f4..c7b810ced 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -237,6 +237,7 @@ func (d *DomainCommitted) replaceKeyWithReference(fullKey, shortKey []byte, type return found } +// nolint func (d *DomainCommitted) lookupShortenedKey(shortKey, fullKey []byte, typAS string, list []*filesItem) bool { fileStep, offset := shortenedKey(shortKey) expected := uint64(fileStep) * d.aggregationStep @@ -248,6 +249,7 @@ func (d *DomainCommitted) lookupShortenedKey(shortKey, fullKey []byte, typAS str } cur := item.bindex.OrdinalLookup(offset) + //nolint fullKey = cur.Key() if d.trace { fmt.Printf("offsetToKey %s [%x]=>{%x} step=%d offset=%d, file=%s.%d-%d.kv\n", typAS, fullKey, shortKey, fileStep, offset, typAS, item.startTxNum, item.endTxNum) From 2a49aaf66d05bda1ba2b7c9c5c60d867bad6db19 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 22 Feb 2023 13:46:39 +0000 Subject: [PATCH 22/54] aggregator: remove code for support 'prefixLen' --- state/aggregator.go | 8 ++++---- state/aggregator_bench_test.go | 4 ++-- state/aggregator_test.go | 10 +++++----- state/domain.go | 22 ++-------------------- state/domain_committed.go | 28 ---------------------------- state/domain_test.go | 28 +++++++++++----------------- 6 files changed, 24 insertions(+), 76 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 7c4c90d03..de0293de9 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -78,17 +78,17 @@ func NewAggregator( if err != nil { return nil, err } - if a.accounts, err = NewDomain(dir, tmpdir, aggregationStep, "accounts", kv.AccountKeys, kv.AccountVals, kv.AccountHistoryKeys, kv.AccountHistoryVals, kv.AccountSettings, kv.AccountIdx, 0 /* prefixLen */, false /* compressVals */); err != nil { + if a.accounts, err = NewDomain(dir, tmpdir, aggregationStep, "accounts", kv.AccountKeys, kv.AccountVals, kv.AccountHistoryKeys, kv.AccountHistoryVals, kv.AccountSettings, kv.AccountIdx, false /* compressVals */); err != nil { return nil, err } - if a.storage, err = NewDomain(dir, tmpdir, aggregationStep, "storage", kv.StorageKeys, kv.StorageVals, kv.StorageHistoryKeys, kv.StorageHistoryVals, kv.StorageSettings, kv.StorageIdx, 20 /* prefixLen */, false /* compressVals */); err != nil { + if a.storage, err = NewDomain(dir, tmpdir, aggregationStep, "storage", kv.StorageKeys, kv.StorageVals, kv.StorageHistoryKeys, kv.StorageHistoryVals, kv.StorageSettings, kv.StorageIdx, false /* compressVals */); err != nil { return nil, err } - if a.code, err = NewDomain(dir, tmpdir, aggregationStep, "code", kv.CodeKeys, kv.CodeVals, kv.CodeHistoryKeys, kv.CodeHistoryVals, kv.CodeSettings, kv.CodeIdx, 0 /* prefixLen */, true /* compressVals */); err != nil { + if a.code, err = NewDomain(dir, tmpdir, aggregationStep, "code", kv.CodeKeys, kv.CodeVals, kv.CodeHistoryKeys, kv.CodeHistoryVals, kv.CodeSettings, kv.CodeIdx, true /* compressVals */); err != nil { return nil, err } - commitd, err := NewDomain(dir, tmpdir, aggregationStep, "commitment", kv.CommitmentKeys, kv.CommitmentVals, kv.CommitmentHistoryKeys, kv.CommitmentHistoryVals, kv.CommitmentSettings, kv.CommitmentIdx, 0 /* prefixLen */, false /* compressVals */) + commitd, err := NewDomain(dir, tmpdir, aggregationStep, "commitment", kv.CommitmentKeys, kv.CommitmentVals, kv.CommitmentHistoryKeys, kv.CommitmentHistoryVals, kv.CommitmentSettings, kv.CommitmentIdx, false /* compressVals */) if err != nil { return nil, err } diff --git a/state/aggregator_bench_test.go b/state/aggregator_bench_test.go index d5a985fc3..92410a23a 100644 --- a/state/aggregator_bench_test.go +++ b/state/aggregator_bench_test.go @@ -18,7 +18,7 @@ import ( "github.com/ledgerwatch/erigon-lib/kv/mdbx" ) -func testDbAndAggregatorBench(b *testing.B, prefixLen int, aggStep uint64) (string, kv.RwDB, *Aggregator) { +func testDbAndAggregatorBench(b *testing.B, aggStep uint64) (string, kv.RwDB, *Aggregator) { b.Helper() path := b.TempDir() b.Cleanup(func() { os.RemoveAll(path) }) @@ -41,7 +41,7 @@ func BenchmarkAggregator_Processing(b *testing.B) { vals := queueKeys(ctx, 53, length.Hash) aggStep := uint64(100_00) - _, db, agg := testDbAndAggregatorBench(b, length.Addr, aggStep) + _, db, agg := testDbAndAggregatorBench(b, aggStep) tx, err := db.BeginRw(ctx) require.NoError(b, err) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 0f69a2c28..7f9a5d8d5 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -25,7 +25,7 @@ import ( "github.com/ledgerwatch/erigon-lib/recsplit" ) -func testDbAndAggregator(t *testing.T, prefixLen int, aggStep uint64) (string, kv.RwDB, *Aggregator) { +func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggregator) { t.Helper() path := t.TempDir() t.Cleanup(func() { os.RemoveAll(path) }) @@ -41,7 +41,7 @@ func testDbAndAggregator(t *testing.T, prefixLen int, aggStep uint64) (string, k } func TestAggregator_Merge(t *testing.T) { - _, db, agg := testDbAndAggregator(t, 0, 100) + _, db, agg := testDbAndAggregator(t, 100) tx, err := db.BeginRwNosync(context.Background()) require.NoError(t, err) @@ -123,7 +123,7 @@ func TestAggregator_Merge(t *testing.T) { // - new aggregator SeekCommitment must return txNum equal to amount of total txns func TestAggregator_RestartOnDatadir(t *testing.T) { aggStep := uint64(50) - path, db, agg := testDbAndAggregator(t, 0, aggStep) + path, db, agg := testDbAndAggregator(t, aggStep) tx, err := db.BeginRw(context.Background()) require.NoError(t, err) @@ -219,7 +219,7 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { func TestAggregator_RestartOnFiles(t *testing.T) { aggStep := uint64(100) - path, db, agg := testDbAndAggregator(t, 0, aggStep) + path, db, agg := testDbAndAggregator(t, aggStep) defer db.Close() _ = path @@ -324,7 +324,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { func TestAggregator_ReplaceCommittedKeys(t *testing.T) { aggStep := uint64(10000) - path, db, agg := testDbAndAggregator(t, 0, aggStep) + path, db, agg := testDbAndAggregator(t, aggStep) defer db.Close() _ = path diff --git a/state/domain.go b/state/domain.go index bcfcc6a7e..b7f4c945d 100644 --- a/state/domain.go +++ b/state/domain.go @@ -145,7 +145,6 @@ type Domain struct { keysTable string // key -> invertedStep , invertedStep = ^(txNum / aggregationStep), Needs to be table with DupSort valsTable string // key + invertedStep -> values stats DomainStats - prefixLen int // Number of bytes in the keys that can be used for prefix iteration mergesCount uint64 } @@ -159,15 +158,13 @@ func NewDomain( historyValsTable string, settingsTable string, indexTable string, - prefixLen int, compressVals bool, ) (*Domain, error) { d := &Domain{ keysTable: keysTable, valsTable: valsTable, - //prefixLen: prefixLen, - files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), - roFiles: *atomic2.NewPointer(&[]ctxItem{}), + files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), + roFiles: *atomic2.NewPointer(&[]ctxItem{}), } var err error @@ -706,14 +703,10 @@ func (dc *DomainContext) Close() { } // IteratePrefix iterates over key-value pairs of the domain that start with given prefix -// The length of the prefix has to match the `prefixLen` parameter used to create the domain // Such iteration is not intended to be used in public API, therefore it uses read-write transaction // inside the domain. Another version of this for public API use needs to be created, that uses // roTx instead and supports ending the iterations before it reaches the end. func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) error { - //if len(prefix) != dc.d.prefixLen { - // return fmt.Errorf("wrong prefix length, this %s domain supports prefixLen %d, given [%x]", dc.d.filenameBase, dc.d.prefixLen, prefix) - //} atomic.AddUint64(&dc.d.stats.HistoryQueries, 1) var cp CursorHeap @@ -985,7 +978,6 @@ func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv defer keysCursor.Close() var ( - //prefix []byte // Track prefix to insert it before entries k, v []byte pos uint64 valuesCount uint @@ -1020,16 +1012,6 @@ func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv if err != nil { return Collation{}, fmt.Errorf("find last %s value for aggregation step k=[%x]: %w", d.filenameBase, k, err) } - //if d.prefixLen > 0 && (prefix == nil || !bytes.HasPrefix(k, prefix)) { - // prefix = append(prefix[:0], k[:d.prefixLen]...) - // if err = valuesComp.AddUncompressedWord(prefix); err != nil { - // return Collation{}, fmt.Errorf("add %s values prefix [%x]: %w", d.filenameBase, prefix, err) - // } - // if err = valuesComp.AddUncompressedWord(nil); err != nil { - // return Collation{}, fmt.Errorf("add %s values prefix val [%x]: %w", d.filenameBase, prefix, err) - // } - // valuesCount++ - //} if err = valuesComp.AddUncompressedWord(k); err != nil { return Collation{}, fmt.Errorf("add %s values key [%x]: %w", d.filenameBase, k, err) } diff --git a/state/domain_committed.go b/state/domain_committed.go index dbc91c712..58af4e7e6 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -429,38 +429,10 @@ func (d *DomainCommitted) mergeFiles(ctx context.Context, oldFiles SelectedStati heap.Pop(&cp) } } - //var skip bool - //if d.prefixLen > 0 { - // skip = r.valuesStartTxNum == 0 && len(lastVal) == 0 && len(lastKey) != d.prefixLen - //} else { // For the rest of types, empty value means deletion skip := r.valuesStartTxNum == 0 && len(lastVal) == 0 //} if !skip { - //if keyBuf != nil && (d.prefixLen == 0 || len(keyBuf) != d.prefixLen || bytes.HasPrefix(lastKey, keyBuf)) { - // if err = comp.AddUncompressedWord(keyBuf); err != nil { - // return nil, nil, nil, err - // } - // keyCount++ // Only counting keys, not values - // - // if d.trace { - // fmt.Printf("merge: multi-way key %x, total keys %d\n", keyBuf, keyCount) - // } - // - // valBuf, err = d.commitmentValTransform(&oldFiles, &mergedFiles, valBuf) - // if err != nil { - // return nil, nil, nil, fmt.Errorf("merge: valTransform [%x] %w", valBuf, err) - // } - // if d.compressVals { - // if err = comp.AddWord(valBuf); err != nil { - // return nil, nil, nil, err - // } - // } else { - // if err = comp.AddUncompressedWord(valBuf); err != nil { - // return nil, nil, nil, err - // } - // } - //} keyBuf = append(keyBuf[:0], lastKey...) valBuf = append(valBuf[:0], lastVal...) } diff --git a/state/domain_test.go b/state/domain_test.go index d95e792f1..74d0dbf83 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -35,7 +35,7 @@ import ( "github.com/ledgerwatch/erigon-lib/recsplit" ) -func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { +func testDbAndDomain(t *testing.T) (string, kv.RwDB, *Domain) { t.Helper() path := t.TempDir() t.Cleanup(func() { os.RemoveAll(path) }) @@ -57,7 +57,7 @@ func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { } }).MustOpen() t.Cleanup(db.Close) - d, err := NewDomain(path, path, 16 /* aggregationStep */, "base" /* filenameBase */, keysTable, valsTable, historyKeysTable, historyValsTable, settingsTable, indexTable, prefixLen, true /* compressVals */) + d, err := NewDomain(path, path, 16 /* aggregationStep */, "base" /* filenameBase */, keysTable, valsTable, historyKeysTable, historyValsTable, settingsTable, indexTable, true /* compressVals */) require.NoError(t, err) t.Cleanup(d.Close) return path, db, d @@ -67,7 +67,7 @@ func testDbAndDomain(t *testing.T, prefixLen int) (string, kv.RwDB, *Domain) { func TestCollationBuild(t *testing.T) { logEvery := time.NewTicker(30 * time.Second) defer logEvery.Stop() - _, db, d := testDbAndDomain(t, 0 /* prefixLen */) + _, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) @@ -127,7 +127,7 @@ func TestCollationBuild(t *testing.T) { } func TestIterationBasic(t *testing.T) { - _, db, d := testDbAndDomain(t, 5 /* prefixLen */) + _, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(t, err) @@ -167,7 +167,7 @@ func TestIterationBasic(t *testing.T) { func TestAfterPrune(t *testing.T) { logEvery := time.NewTicker(30 * time.Second) defer logEvery.Stop() - _, db, d := testDbAndDomain(t, 0 /* prefixLen */) + _, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) @@ -241,7 +241,7 @@ func TestAfterPrune(t *testing.T) { func filledDomain(t *testing.T) (string, kv.RwDB, *Domain, uint64) { t.Helper() - path, db, d := testDbAndDomain(t, 0 /* prefixLen */) + path, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(t, err) @@ -348,7 +348,7 @@ func TestHistory(t *testing.T) { func TestIterationMultistep(t *testing.T) { logEvery := time.NewTicker(30 * time.Second) defer logEvery.Stop() - _, db, d := testDbAndDomain(t, 5 /* prefixLen */) + _, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(t, err) @@ -504,20 +504,14 @@ func TestScanFiles(t *testing.T) { txNum := d.txNum d.closeWhatNotInList([]string{}) d.OpenFolder() - //d.Close() - // - //var err error - //d, err = NewDomain(path, path, d.aggregationStep, d.filenameBase, d.keysTable, d.valsTable, d.indexKeysTable, d.historyValsTable, d.settingsTable, d.indexTable, d.prefixLen, d.compressVals) - //require.NoError(t, err) - //require.NoError(t, d.OpenFolder()) - //defer d.Close() + d.SetTxNum(txNum) // Check the history checkHistory(t, db, d, txs) } func TestDelete(t *testing.T) { - _, db, d := testDbAndDomain(t, 0 /* prefixLen */) + _, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(t, err) @@ -559,7 +553,7 @@ func TestDelete(t *testing.T) { func filledDomainFixedSize(t *testing.T, keysCount, txCount uint64) (string, kv.RwDB, *Domain, map[string][]bool) { t.Helper() - path, db, d := testDbAndDomain(t, 0 /* prefixLen */) + path, db, d := testDbAndDomain(t) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(t, err) @@ -658,7 +652,7 @@ func TestDomain_Prune_AfterAllWrites(t *testing.T) { func TestDomain_PruneOnWrite(t *testing.T) { keysCount, txCount := uint64(16), uint64(64) - path, db, d := testDbAndDomain(t, 0 /* prefixLen */) + path, db, d := testDbAndDomain(t) ctx := context.Background() defer os.Remove(path) From b020c814c760c30211618f5f3307afa24ff36bd0 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 22 Feb 2023 16:36:27 +0000 Subject: [PATCH 23/54] aggregator: add trie variant selection --- state/aggregator.go | 8 ++----- state/aggregator_bench_test.go | 3 ++- state/aggregator_test.go | 7 +++--- state/domain_committed.go | 39 +++++++++++++++++++++++++--------- 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index de0293de9..b409803ac 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -61,11 +61,7 @@ type Aggregator struct { defaultCtx *AggregatorContext } -func NewAggregator( - dir, tmpdir string, - aggregationStep uint64, -) (*Aggregator, error) { - +func NewAggregator(dir, tmpdir string, aggregationStep uint64, commitmentMode CommitmentMode, commitTrieVariant commitment.TrieVariant) (*Aggregator, error) { a := &Aggregator{aggregationStep: aggregationStep, tmpdir: tmpdir, stepDoneNotice: make(chan [length.Hash]byte, 1)} closeAgg := true @@ -92,7 +88,7 @@ func NewAggregator( if err != nil { return nil, err } - a.commitment = NewCommittedDomain(commitd, CommitmentModeDirect) + a.commitment = NewCommittedDomain(commitd, commitmentMode, commitTrieVariant) if a.logAddrs, err = NewInvertedIndex(dir, tmpdir, aggregationStep, "logaddrs", kv.LogAddressKeys, kv.LogAddressIdx, false, nil); err != nil { return nil, err diff --git a/state/aggregator_bench_test.go b/state/aggregator_bench_test.go index 92410a23a..3abd4ed45 100644 --- a/state/aggregator_bench_test.go +++ b/state/aggregator_bench_test.go @@ -13,6 +13,7 @@ import ( "github.com/ledgerwatch/log/v3" "github.com/stretchr/testify/require" + "github.com/ledgerwatch/erigon-lib/commitment" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/mdbx" @@ -27,7 +28,7 @@ func testDbAndAggregatorBench(b *testing.B, aggStep uint64) (string, kv.RwDB, *A return kv.ChaindataTablesCfg }).MustOpen() b.Cleanup(db.Close) - agg, err := NewAggregator(path, path, aggStep) + agg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(b, err) b.Cleanup(agg.Close) return path, db, agg diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 7f9a5d8d5..355aa0fdd 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -17,6 +17,7 @@ import ( "github.com/ledgerwatch/log/v3" "github.com/stretchr/testify/require" + "github.com/ledgerwatch/erigon-lib/commitment" "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/compress" @@ -34,7 +35,7 @@ func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggreg return kv.ChaindataTablesCfg }).MustOpen() t.Cleanup(db.Close) - agg, err := NewAggregator(path, path, aggStep) + agg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) t.Cleanup(agg.Close) return path, db, agg @@ -179,7 +180,7 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { tx = nil // Start another aggregator on same datadir - anotherAgg, err := NewAggregator(path, path, aggStep) + anotherAgg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) require.NoError(t, anotherAgg.ReopenFolder()) @@ -283,7 +284,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { require.NoError(t, err) defer newTx.Rollback() - newAgg, err := NewAggregator(path, path, aggStep) + newAgg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) require.NoError(t, newAgg.ReopenFolder()) defer newAgg.Close() diff --git a/state/domain_committed.go b/state/domain_committed.go index 58af4e7e6..d823fa245 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -53,14 +53,14 @@ type DomainCommitted struct { trace bool commTree *btree.BTreeG[*CommitmentItem] keccak hash.Hash - patriciaTrie *commitment.HexPatriciaHashed + patriciaTrie commitment.Trie branchMerger *commitment.BranchMerger } -func NewCommittedDomain(d *Domain, mode CommitmentMode) *DomainCommitted { +func NewCommittedDomain(d *Domain, mode CommitmentMode, trieVariant commitment.TrieVariant) *DomainCommitted { return &DomainCommitted{ Domain: d, - patriciaTrie: commitment.NewHexPatriciaHashed(length.Addr, nil, nil, nil), + patriciaTrie: commitment.InitializeTrie(trieVariant), commTree: btree.NewG[*CommitmentItem](32, commitmentItemLess), keccak: sha3.NewLegacyKeccak256(), mode: mode, @@ -189,9 +189,17 @@ func (d *DomainCommitted) hashAndNibblizeKey(key []byte) []byte { } func (d *DomainCommitted) storeCommitmentState(blockNum, txNum uint64) error { - state, err := d.patriciaTrie.EncodeCurrentState(nil) - if err != nil { - return err + var state []byte + var err error + + switch trie := (d.patriciaTrie).(type) { + case *commitment.HexPatriciaHashed: + state, err = trie.EncodeCurrentState(nil) + if err != nil { + return err + } + default: + return fmt.Errorf("unsupported state storing for patricia trie type: %T", d.patriciaTrie) } cs := &commitmentState{txNum: txNum, trieState: state, blockNum: blockNum} encoded, err := cs.Encode() @@ -523,12 +531,18 @@ var keyCommitmentState = []byte("state") // SeekCommitment searches for last encoded state from DomainCommitted // and if state found, sets it up to current domain func (d *DomainCommitted) SeekCommitment(aggStep, sinceTx uint64) (uint64, error) { + if d.patriciaTrie.Variant() != commitment.VariantHexPatriciaTrie { + return 0, fmt.Errorf("state storing is only supported hex patricia trie") + } + // todo add support of bin state dumping + var ( latestState []byte stepbuf [2]byte - step uint16 = uint16(sinceTx/aggStep) - 1 + step = uint16(sinceTx/aggStep) - 1 latestTxNum uint64 = sinceTx - 1 ) + d.SetTxNum(latestTxNum) ctx := d.MakeContext() @@ -547,7 +561,7 @@ func (d *DomainCommitted) SeekCommitment(aggStep, sinceTx uint64) (uint64, error break } latestTxNum, latestState = v, s - lookupTxN := latestTxNum + aggStep // - 1 + lookupTxN := latestTxNum + aggStep step = uint16(latestTxNum/aggStep) + 1 d.SetTxNum(lookupTxN) } @@ -557,9 +571,14 @@ func (d *DomainCommitted) SeekCommitment(aggStep, sinceTx uint64) (uint64, error return 0, nil } - if err := d.patriciaTrie.SetState(latest.trieState); err != nil { - return 0, err + if hext, ok := d.patriciaTrie.(*commitment.HexPatriciaHashed); ok { + if err := hext.SetState(latest.trieState); err != nil { + return 0, err + } + } else { + return 0, fmt.Errorf("state storing is only supported hex patricia trie") } + return latest.txNum, nil } From 4b44e307b2c49aa7b0aab8b642772d212ad4bcfb Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 22 Feb 2023 16:47:14 +0000 Subject: [PATCH 24/54] fix --- state/aggregator_test.go | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 355aa0fdd..57ebbed4f 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -599,14 +599,22 @@ func Test_BtreeIndex_Allocation(t *testing.T) { for i := 5; i < 24; i++ { t.Run(fmt.Sprintf("%d", m< m*4 { + break + } + } bt := newBtAlloc(uint64(count), uint64(m)< Date: Fri, 24 Feb 2023 17:16:29 +0000 Subject: [PATCH 25/54] fix merge after incorrect prefix removal --- state/aggregator_test.go | 5 ++--- state/merge.go | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 57ebbed4f..9dd400140 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -607,14 +607,13 @@ func Test_BtreeIndex_Allocation(t *testing.T) { } } bt := newBtAlloc(uint64(count), uint64(m)< Date: Fri, 24 Feb 2023 19:14:39 +0000 Subject: [PATCH 26/54] fixed several bt allocation corner cases --- state/aggregator_test.go | 7 ++----- state/btree_index.go | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 9dd400140..65057783a 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -602,18 +602,15 @@ func Test_BtreeIndex_Allocation(t *testing.T) { var count int for { count = rnd.Intn(1000000000) - if count > m*4 { + if count > (m<<1)*4 { break } } bt := newBtAlloc(uint64(count), uint64(m)<>1))) } for i := a.d - 1; i > 0; i-- { @@ -151,8 +154,15 @@ func newBtAlloc(k, M uint64, trace bool) *btAlloc { ncount := uint64(0) pnv := uint64(0) for l := a.d - 1; l > 0; l-- { - s := nnc(a.vx[l+1]) - a.sons[l] = append(a.sons[l], s, M) + //s := nnc(a.vx[l+1]) + sh := nvc(a.vx[l+1]) + + if sh&1 == 1 { + a.sons[l] = append(a.sons[l], sh>>1, M, 1, M>>1) + } else { + a.sons[l] = append(a.sons[l], sh>>1, M) + } + for ik := 0; ik < len(a.sons[l]); ik += 2 { ncount += a.sons[l][ik] * a.sons[l][ik+1] if l == 1 { @@ -165,7 +175,6 @@ func newBtAlloc(k, M uint64, trace bool) *btAlloc { a.N = ncount if trace { - fmt.Printf("ncount=%d ∂%.5f\n", ncount, float64(a.N-k)/float64(a.N)) for i, v := range a.sons { fmt.Printf("L%d=%v\n", i, v) } @@ -265,6 +274,10 @@ func (a *btAlloc) traverseDfs() { a.nodes[0] = make([]node, 0) } a.nodes[0] = append(a.nodes[0], node{d: a.K}) + a.N = a.K + if a.trace { + fmt.Printf("ncount=%d ∂%.5f\n", a.N, float64(a.N-a.K)/float64(a.N)) + } return } @@ -381,6 +394,10 @@ func (a *btAlloc) traverseDfs() { } } } + + if a.trace { + fmt.Printf("ncount=%d ∂%.5f\n", a.N, float64(a.N-a.K)/float64(a.N)) + } } func (a *btAlloc) bsKey(x []byte, l, r uint64) (*Cursor, error) { From e0aa4f8e61d18c63beecfd62a1e985efbb0f3dc9 Mon Sep 17 00:00:00 2001 From: awskii Date: Fri, 24 Feb 2023 19:36:02 +0000 Subject: [PATCH 27/54] fix merge after incorrect prefix removal --- state/aggregator_test.go | 2 +- state/btree_index.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 65057783a..a286201e5 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -610,7 +610,7 @@ func Test_BtreeIndex_Allocation(t *testing.T) { bt.traverseDfs() require.GreaterOrEqual(t, bt.N, uint64(count)) - require.LessOrEqual(t, float64(bt.N-uint64(count))/float64(bt.N), 0.07) + require.LessOrEqual(t, float64(bt.N-uint64(count))/float64(bt.N), 0.05) } }) } diff --git a/state/btree_index.go b/state/btree_index.go index 311b574c9..ec632cf07 100644 --- a/state/btree_index.go +++ b/state/btree_index.go @@ -317,6 +317,11 @@ func (a *btAlloc) traverseDfs() { pc.si++ di++ } + if di > a.K { + a.N = di - 1 // actually filled node count + stop = true + break + } } a.nodes[c.l] = append(a.nodes[c.l], node{p: c.p, d: c.di, s: c.si}) From 71b158fe6b0b62dae2480b1209a279deb1d3fa5c Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 12:34:41 +0000 Subject: [PATCH 28/54] fix win tests --- state/aggregator_test.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index a286201e5..e5020944e 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -37,7 +37,6 @@ func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggreg t.Cleanup(db.Close) agg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) - t.Cleanup(agg.Close) return path, db, agg } @@ -53,7 +52,9 @@ func TestAggregator_Merge(t *testing.T) { }() agg.SetTx(tx) + defer agg.Close() defer agg.StartWrites().FinishWrites() + txs := uint64(10000) rnd := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -134,7 +135,7 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { } }() agg.SetTx(tx) - defer agg.StartWrites().FinishWrites() + agg.StartWrites() var latestCommitTxNum uint64 @@ -177,6 +178,8 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, err) err = tx.Commit() require.NoError(t, err) + agg.FinishWrites() + agg.Close() tx = nil // Start another aggregator on same datadir @@ -325,9 +328,8 @@ func TestAggregator_RestartOnFiles(t *testing.T) { func TestAggregator_ReplaceCommittedKeys(t *testing.T) { aggStep := uint64(10000) - path, db, agg := testDbAndAggregator(t, aggStep) - defer db.Close() - _ = path + _, db, agg := testDbAndAggregator(t, aggStep) + t.Cleanup(agg.Close) tx, err := db.BeginRw(context.Background()) require.NoError(t, err) @@ -601,7 +603,7 @@ func Test_BtreeIndex_Allocation(t *testing.T) { for j := 0; j < 10; j++ { var count int for { - count = rnd.Intn(1000000000) + count = rnd.Intn(100000000) if count > (m<<1)*4 { break } From 6b0b4aefd2839194de8b4bdbb83417a88c50ff43 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 13:26:52 +0000 Subject: [PATCH 29/54] win test fix try --- state/aggregator.go | 3 +++ state/aggregator_test.go | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index b409803ac..407537f43 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -191,6 +191,9 @@ func (a *Aggregator) Close() { if a.stepDoneNotice != nil { close(a.stepDoneNotice) } + if a.defaultCtx != nil { + a.defaultCtx.Close() + } if a.accounts != nil { a.accounts.Close() } diff --git a/state/aggregator_test.go b/state/aggregator_test.go index e5020944e..c71baec57 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -29,7 +29,6 @@ import ( func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggregator) { t.Helper() path := t.TempDir() - t.Cleanup(func() { os.RemoveAll(path) }) logger := log.New() db := mdbx.NewMDBX(logger).InMem(filepath.Join(path, "db4")).WithTableCfg(func(defaultBuckets kv.TableCfg) kv.TableCfg { return kv.ChaindataTablesCfg @@ -224,8 +223,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { aggStep := uint64(100) path, db, agg := testDbAndAggregator(t, aggStep) - defer db.Close() - _ = path + defer os.RemoveAll(path) tx, err := db.BeginRw(context.Background()) require.NoError(t, err) @@ -272,6 +270,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { require.NoError(t, err) tx = nil db.Close() + agg.Close() db = nil agg = nil @@ -306,7 +305,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { require.NoError(t, err) if len(stored) == 0 { if uint64(i+1) >= txs-aggStep { - continue // finishtx always stores last agg step in db which we deleteelete, so miss is expected + continue // finishtx always stores last agg step in db which we deleted, so missing values which were not aggregated is expected } miss++ fmt.Printf("%x [%d/%d]", key, miss, i+1) // txnum starts from 1 From 0ffda799a2995b54f54f357d7f67e69aee50ec16 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 13:52:38 +0000 Subject: [PATCH 30/54] fix --- state/aggregator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index c71baec57..559e79c45 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -105,7 +105,6 @@ func TestAggregator_Merge(t *testing.T) { defer roTx.Rollback() dc := agg.MakeContext() - defer dc.Close() v, err := dc.ReadCommitment([]byte("roothash"), roTx) require.NoError(t, err) @@ -113,6 +112,7 @@ func TestAggregator_Merge(t *testing.T) { v, err = dc.ReadCommitment([]byte("otherroothash"), roTx) require.NoError(t, err) + dc.Close() require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) } From 4088ff776a51640bb34800fc7ffb58bdfcaf0fc5 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 17:00:26 +0000 Subject: [PATCH 31/54] fix --- state/aggregator_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 559e79c45..d9acf4407 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -41,6 +41,7 @@ func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggreg func TestAggregator_Merge(t *testing.T) { _, db, agg := testDbAndAggregator(t, 100) + defer agg.Close() tx, err := db.BeginRwNosync(context.Background()) require.NoError(t, err) @@ -51,7 +52,6 @@ func TestAggregator_Merge(t *testing.T) { }() agg.SetTx(tx) - defer agg.Close() defer agg.StartWrites().FinishWrites() txs := uint64(10000) @@ -105,6 +105,8 @@ func TestAggregator_Merge(t *testing.T) { defer roTx.Rollback() dc := agg.MakeContext() + defer dc.Close() + v, err := dc.ReadCommitment([]byte("roothash"), roTx) require.NoError(t, err) @@ -223,7 +225,6 @@ func TestAggregator_RestartOnFiles(t *testing.T) { aggStep := uint64(100) path, db, agg := testDbAndAggregator(t, aggStep) - defer os.RemoveAll(path) tx, err := db.BeginRw(context.Background()) require.NoError(t, err) @@ -301,12 +302,12 @@ func TestAggregator_RestartOnFiles(t *testing.T) { defer ctx.Close() miss := uint64(0) for i, key := range keys { + if uint64(i+1) >= txs-aggStep { + continue // finishtx always stores last agg step in db which we deleted, so missing values which were not aggregated is expected + } stored, err := ctx.ReadAccountData(key[:length.Addr], newTx) require.NoError(t, err) if len(stored) == 0 { - if uint64(i+1) >= txs-aggStep { - continue // finishtx always stores last agg step in db which we deleted, so missing values which were not aggregated is expected - } miss++ fmt.Printf("%x [%d/%d]", key, miss, i+1) // txnum starts from 1 continue From 3193a1e2014d4f5a929f7e754de7233bde491d08 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 17:42:04 +0000 Subject: [PATCH 32/54] fix --- state/aggregator_test.go | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index d9acf4407..bb915f429 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -39,6 +39,47 @@ func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggreg return path, db, agg } +func TestAggregator_WinAccess(t *testing.T) { + _, db, agg := testDbAndAggregator(t, 100) + defer agg.Close() + + tx, err := db.BeginRwNosync(context.Background()) + require.NoError(t, err) + defer func() { + if tx != nil { + tx.Rollback() + } + }() + agg.SetTx(tx) + + defer agg.StartWrites().FinishWrites() + + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + for txNum := uint64(1); txNum <= 1000; txNum++ { + agg.SetTxNum(txNum) + + addr := make([]byte, length.Addr) + n, err := rnd.Read(addr) + require.NoError(t, err) + require.EqualValues(t, length.Addr, n) + + buf := EncodeAccountBytes(1, uint256.NewInt(uint64(rand.Intn(10e9))), nil, 0) + err = agg.UpdateAccountData(addr, buf) + require.NoError(t, err) + + var v [8]byte + binary.BigEndian.PutUint64(v[:], txNum) + require.NoError(t, err) + require.NoError(t, agg.FinishTx()) + } + err = agg.Flush(context.Background()) + require.NoError(t, err) + err = tx.Commit() + require.NoError(t, err) + tx = nil + +} + func TestAggregator_Merge(t *testing.T) { _, db, agg := testDbAndAggregator(t, 100) defer agg.Close() From e177b4a8b66a87fdd636f3ecfab41729b6aa9759 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 20:36:52 +0000 Subject: [PATCH 33/54] fix --- state/aggregator_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index bb915f429..198c7cea2 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -52,10 +52,10 @@ func TestAggregator_WinAccess(t *testing.T) { }() agg.SetTx(tx) - defer agg.StartWrites().FinishWrites() + agg.StartWrites() rnd := rand.New(rand.NewSource(time.Now().UnixNano())) - for txNum := uint64(1); txNum <= 1000; txNum++ { + for txNum := uint64(1); txNum <= 100; txNum++ { agg.SetTxNum(txNum) addr := make([]byte, length.Addr) @@ -72,7 +72,8 @@ func TestAggregator_WinAccess(t *testing.T) { require.NoError(t, err) require.NoError(t, agg.FinishTx()) } - err = agg.Flush(context.Background()) + agg.FinishWrites() + require.NoError(t, err) err = tx.Commit() require.NoError(t, err) From aaeec98a7205addd334a70a9fc2ce9b976ae4386 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 20:55:09 +0000 Subject: [PATCH 34/54] fix --- state/aggregator.go | 10 +++++----- state/aggregator_test.go | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 407537f43..e896231d3 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -394,11 +394,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { return fmt.Errorf("domain collate-build failed: %w", err) } - defer func() { // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() - }() - var clo, chi, plo, phi, blo, bhi time.Duration clo, plo, blo = time.Hour*99, time.Hour*99, time.Hour*99 for _, s := range []DomainStats{a.accounts.stats, a.code.stats, a.storage.stats} { @@ -436,6 +431,11 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { mergeStartedAt := time.Now() maxEndTxNum := a.EndTxNumMinimax() + defer func() { // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() + }() + var upmerges int for { somethingMerged, err := a.mergeLoopStep(ctx, maxEndTxNum, 1) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 198c7cea2..3d6839370 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -78,7 +78,6 @@ func TestAggregator_WinAccess(t *testing.T) { err = tx.Commit() require.NoError(t, err) tx = nil - } func TestAggregator_Merge(t *testing.T) { @@ -94,7 +93,7 @@ func TestAggregator_Merge(t *testing.T) { }() agg.SetTx(tx) - defer agg.StartWrites().FinishWrites() + agg.StartWrites() txs := uint64(10000) rnd := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -135,6 +134,7 @@ func TestAggregator_Merge(t *testing.T) { require.NoError(t, err) require.NoError(t, agg.FinishTx()) } + agg.FinishWrites() err = agg.Flush(context.Background()) require.NoError(t, err) err = tx.Commit() From 5dcf099e06026edd8b87070c87465b1f9bb60550 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 21:04:51 +0000 Subject: [PATCH 35/54] fix --- state/aggregator_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 3d6839370..525dbb540 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -159,6 +159,7 @@ func TestAggregator_Merge(t *testing.T) { dc.Close() require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) + time.Sleep(time.Second * 2) // lol let aggregator remove its files first } // here we create a bunch of updates for further aggregation. From 2000f94fc34f6bd04b45c42e1e857642cd4677a5 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 21:13:07 +0000 Subject: [PATCH 36/54] fix --- state/aggregator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index e896231d3..b21ed6801 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -429,12 +429,12 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { "prune_min", plo, "prune_max", phi, "files_build_min", blo, "files_build_max", bhi) - mergeStartedAt := time.Now() - maxEndTxNum := a.EndTxNumMinimax() defer func() { // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future a.defaultCtx.Close() a.defaultCtx = a.MakeContext() }() + mergeStartedAt := time.Now() + maxEndTxNum := a.EndTxNumMinimax() var upmerges int for { From 3684aa078cc6471c3ec76da54b77f9272c3b5341 Mon Sep 17 00:00:00 2001 From: awskii Date: Mon, 27 Feb 2023 21:13:52 +0000 Subject: [PATCH 37/54] fix --- state/aggregator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 525dbb540..af7128cf8 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -134,12 +134,12 @@ func TestAggregator_Merge(t *testing.T) { require.NoError(t, err) require.NoError(t, agg.FinishTx()) } - agg.FinishWrites() err = agg.Flush(context.Background()) require.NoError(t, err) err = tx.Commit() require.NoError(t, err) tx = nil + agg.FinishWrites() // Check the history roTx, err := db.BeginRo(context.Background()) From 941953289e55076518c1e73a2c8549fcb5447ae8 Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 14:01:52 +0000 Subject: [PATCH 38/54] fix --- state/aggregator_test.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index af7128cf8..6efa85180 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -134,12 +134,12 @@ func TestAggregator_Merge(t *testing.T) { require.NoError(t, err) require.NoError(t, agg.FinishTx()) } - err = agg.Flush(context.Background()) + //err = agg.Flush(context.Background()) + agg.FinishWrites() require.NoError(t, err) err = tx.Commit() require.NoError(t, err) tx = nil - agg.FinishWrites() // Check the history roTx, err := db.BeginRo(context.Background()) @@ -157,6 +157,11 @@ func TestAggregator_Merge(t *testing.T) { v, err = dc.ReadCommitment([]byte("otherroothash"), roTx) require.NoError(t, err) dc.Close() + fmt.Printf("files %d\n", len(dc.accounts.files)) + for i := 0; i < len(dc.accounts.files); i++ { + f := dc.accounts.files[i] + fmt.Printf("file %d: [%d-%d]\n", f.src.refcount.Load(), f.src.startTxNum, f.src.endTxNum) + } require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) time.Sleep(time.Second * 2) // lol let aggregator remove its files first @@ -262,6 +267,11 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, err) require.EqualValues(t, maxWrite, binary.BigEndian.Uint64(v[:])) + fmt.Printf("files %d\n", len(dc.accounts.files)) + for i := 0; i < len(dc.accounts.files); i++ { + f := dc.accounts.files[i] + fmt.Printf("file %d: [%d-%d]\n", f.src.refcount.Load(), f.src.startTxNum, f.src.endTxNum) + } } func TestAggregator_RestartOnFiles(t *testing.T) { From 04a60bfc685f261554412f26b1b35c65315e58ff Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 14:19:39 +0000 Subject: [PATCH 39/54] fix --- state/aggregator_test.go | 13 +++---------- state/domain.go | 1 + 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 6efa85180..a74e4d1ef 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -81,6 +81,7 @@ func TestAggregator_WinAccess(t *testing.T) { } func TestAggregator_Merge(t *testing.T) { + t.Skip() _, db, agg := testDbAndAggregator(t, 100) defer agg.Close() @@ -157,11 +158,6 @@ func TestAggregator_Merge(t *testing.T) { v, err = dc.ReadCommitment([]byte("otherroothash"), roTx) require.NoError(t, err) dc.Close() - fmt.Printf("files %d\n", len(dc.accounts.files)) - for i := 0; i < len(dc.accounts.files); i++ { - f := dc.accounts.files[i] - fmt.Printf("file %d: [%d-%d]\n", f.src.refcount.Load(), f.src.startTxNum, f.src.endTxNum) - } require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) time.Sleep(time.Second * 2) // lol let aggregator remove its files first @@ -267,11 +263,6 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, err) require.EqualValues(t, maxWrite, binary.BigEndian.Uint64(v[:])) - fmt.Printf("files %d\n", len(dc.accounts.files)) - for i := 0; i < len(dc.accounts.files); i++ { - f := dc.accounts.files[i] - fmt.Printf("file %d: [%d-%d]\n", f.src.refcount.Load(), f.src.startTxNum, f.src.endTxNum) - } } func TestAggregator_RestartOnFiles(t *testing.T) { @@ -379,6 +370,8 @@ func TestAggregator_RestartOnFiles(t *testing.T) { } func TestAggregator_ReplaceCommittedKeys(t *testing.T) { + t.Skip() + aggStep := uint64(10000) _, db, agg := testDbAndAggregator(t, aggStep) diff --git a/state/domain.go b/state/domain.go index b7f4c945d..9ffd16109 100644 --- a/state/domain.go +++ b/state/domain.go @@ -695,6 +695,7 @@ func (dc *DomainContext) Close() { } refCnt := item.src.refcount.Dec() //GC: last reader responsible to remove useles files: close it and delete + fmt.Printf("refCnt: %d [%d-%d]\n", item.src.decompressor.FileName(), refCnt, item.startTxNum, item.endTxNum) if refCnt == 0 && item.src.canDelete.Load() { item.src.closeFilesAndRemove() } From 766f4227a1602981155e49e4b581eb876cccc9e0 Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 14:22:36 +0000 Subject: [PATCH 40/54] fix --- state/domain.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state/domain.go b/state/domain.go index 9ffd16109..777b794a2 100644 --- a/state/domain.go +++ b/state/domain.go @@ -695,7 +695,7 @@ func (dc *DomainContext) Close() { } refCnt := item.src.refcount.Dec() //GC: last reader responsible to remove useles files: close it and delete - fmt.Printf("refCnt: %d [%d-%d]\n", item.src.decompressor.FileName(), refCnt, item.startTxNum, item.endTxNum) + fmt.Printf("%s refCnt: %d [%d-%d]\n", item.src.decompressor.FileName(), refCnt, item.startTxNum, item.endTxNum) if refCnt == 0 && item.src.canDelete.Load() { item.src.closeFilesAndRemove() } From 3cd1b07be7e18aa85eb141e8f05a40c3eb4a7e43 Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 19:00:21 +0000 Subject: [PATCH 41/54] fix1 --- state/aggregator.go | 16 ++++++++-------- state/aggregator_bench_test.go | 2 -- state/aggregator_test.go | 10 ++-------- state/domain_committed.go | 1 + 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index b21ed6801..1de4f469a 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -105,7 +105,6 @@ func NewAggregator(dir, tmpdir string, aggregationStep uint64, commitmentMode Co closeAgg = false a.defaultCtx = a.MakeContext() - a.commitment.patriciaTrie.ResetFns(a.defaultCtx.branchFn, a.defaultCtx.accountFn, a.defaultCtx.storageFn) return a, nil } @@ -188,12 +187,12 @@ func (a *Aggregator) GetAndResetStats() DomainStats { } func (a *Aggregator) Close() { - if a.stepDoneNotice != nil { - close(a.stepDoneNotice) - } if a.defaultCtx != nil { a.defaultCtx.Close() } + if a.stepDoneNotice != nil { + close(a.stepDoneNotice) + } if a.accounts != nil { a.accounts.Close() } @@ -429,10 +428,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { "prune_min", plo, "prune_max", phi, "files_build_min", blo, "files_build_max", bhi) - defer func() { // this need, to ensure we do all operations on files in "transaction-style", maybe we will ensure it on type-level in future - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() - }() mergeStartedAt := time.Now() maxEndTxNum := a.EndTxNumMinimax() @@ -899,6 +894,8 @@ func (a *Aggregator) FinishTx() (err error) { if !a.ReadyToFinishTx() { return nil } + + a.commitment.patriciaTrie.ResetFns(a.defaultCtx.branchFn, a.defaultCtx.accountFn, a.defaultCtx.storageFn) rootHash, err := a.ComputeCommitment(true, false) if err != nil { return err @@ -920,6 +917,9 @@ func (a *Aggregator) FinishTx() (err error) { a.notifyAggregated(rootHash) + a.FinishWrites() + a.StartWrites() + return nil } diff --git a/state/aggregator_bench_test.go b/state/aggregator_bench_test.go index 3abd4ed45..7fa0198da 100644 --- a/state/aggregator_bench_test.go +++ b/state/aggregator_bench_test.go @@ -55,8 +55,6 @@ func BenchmarkAggregator_Processing(b *testing.B) { agg.SetTx(tx) defer agg.StartWrites().FinishWrites() require.NoError(b, err) - agg.StartWrites() - defer agg.FinishWrites() b.ReportAllocs() b.ResetTimer() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index a74e4d1ef..17450f9c2 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -81,7 +81,6 @@ func TestAggregator_WinAccess(t *testing.T) { } func TestAggregator_Merge(t *testing.T) { - t.Skip() _, db, agg := testDbAndAggregator(t, 100) defer agg.Close() @@ -135,7 +134,6 @@ func TestAggregator_Merge(t *testing.T) { require.NoError(t, err) require.NoError(t, agg.FinishTx()) } - //err = agg.Flush(context.Background()) agg.FinishWrites() require.NoError(t, err) err = tx.Commit() @@ -148,7 +146,6 @@ func TestAggregator_Merge(t *testing.T) { defer roTx.Rollback() dc := agg.MakeContext() - defer dc.Close() v, err := dc.ReadCommitment([]byte("roothash"), roTx) require.NoError(t, err) @@ -370,8 +367,6 @@ func TestAggregator_RestartOnFiles(t *testing.T) { } func TestAggregator_ReplaceCommittedKeys(t *testing.T) { - t.Skip() - aggStep := uint64(10000) _, db, agg := testDbAndAggregator(t, aggStep) @@ -456,10 +451,9 @@ func TestAggregator_ReplaceCommittedKeys(t *testing.T) { tx, err = db.BeginRw(context.Background()) require.NoError(t, err) - ctx := agg.storage.MakeContext() - defer ctx.Close() + ctx := agg.defaultCtx for _, key := range keys { - storedV, err := ctx.Get(key[:length.Addr], key[length.Addr:], tx) + storedV, err := ctx.ReadAccountStorage(key[:length.Addr], key[length.Addr:], tx) require.NoError(t, err) require.EqualValues(t, key[0], storedV[0]) require.EqualValues(t, key[length.Addr], storedV[1]) diff --git a/state/domain_committed.go b/state/domain_committed.go index d823fa245..5dfd6f6b1 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -545,6 +545,7 @@ func (d *DomainCommitted) SeekCommitment(aggStep, sinceTx uint64) (uint64, error d.SetTxNum(latestTxNum) ctx := d.MakeContext() + defer ctx.Close() for { binary.BigEndian.PutUint16(stepbuf[:], step) From dece6f8ad9789d4d3bb43eacfd664267e92a19fb Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 19:13:06 +0000 Subject: [PATCH 42/54] fix1 --- state/domain.go | 1 - 1 file changed, 1 deletion(-) diff --git a/state/domain.go b/state/domain.go index 777b794a2..b7f4c945d 100644 --- a/state/domain.go +++ b/state/domain.go @@ -695,7 +695,6 @@ func (dc *DomainContext) Close() { } refCnt := item.src.refcount.Dec() //GC: last reader responsible to remove useles files: close it and delete - fmt.Printf("%s refCnt: %d [%d-%d]\n", item.src.decompressor.FileName(), refCnt, item.startTxNum, item.endTxNum) if refCnt == 0 && item.src.canDelete.Load() { item.src.closeFilesAndRemove() } From 38237fd9fb8107d9fa973a749a4f33fd6b876747 Mon Sep 17 00:00:00 2001 From: awskii Date: Tue, 28 Feb 2023 23:29:37 +0000 Subject: [PATCH 43/54] fix --- state/aggregator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 1de4f469a..29d5d7cf0 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -917,8 +917,8 @@ func (a *Aggregator) FinishTx() (err error) { a.notifyAggregated(rootHash) - a.FinishWrites() - a.StartWrites() + //a.FinishWrites() + //a.StartWrites() return nil } From 00c0cfa7873b7320c6024bc931d8182ab364d771 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 1 Mar 2023 12:28:26 +0000 Subject: [PATCH 44/54] fix --- state/aggregator.go | 36 ++++++++++++++++++++++++++++++++++-- state/aggregator_test.go | 16 +++++++++------- state/domain.go | 15 +++++++++++++++ 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 29d5d7cf0..fcd4335ff 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -23,6 +23,7 @@ import ( "math" "math/bits" "os" + "runtime" "sync" "sync/atomic" "time" @@ -104,7 +105,7 @@ func NewAggregator(dir, tmpdir string, aggregationStep uint64, commitmentMode Co } closeAgg = false - a.defaultCtx = a.MakeContext() + //a.defaultCtx = a.MakeContext() return a, nil } @@ -442,6 +443,10 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } upmerges++ } + + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() + log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, "aggregation_took", time.Since(stepStartedAt), @@ -1023,6 +1028,21 @@ func (a *Aggregator) StartWrites() *Aggregator { a.logTopics.StartWrites() a.tracesFrom.StartWrites() a.tracesTo.StartWrites() + + if a.defaultCtx != nil { + a.defaultCtx.Close() + } + a.defaultCtx = &AggregatorContext{ + a: a, + accounts: a.accounts.defaultDc, + storage: a.storage.defaultDc, + code: a.code.defaultDc, + commitment: a.commitment.defaultDc, + logAddrs: a.logAddrs.MakeContext(), + logTopics: a.logTopics.MakeContext(), + tracesFrom: a.tracesFrom.MakeContext(), + tracesTo: a.tracesTo.MakeContext(), + } return a } func (a *Aggregator) FinishWrites() { @@ -1034,6 +1054,10 @@ func (a *Aggregator) FinishWrites() { a.logTopics.FinishWrites() a.tracesFrom.FinishWrites() a.tracesTo.FinishWrites() + if a.defaultCtx != nil { + a.defaultCtx.Close() + a.defaultCtx = nil + } } // Flush - must be called before Collate, if you did some writes @@ -1080,6 +1104,7 @@ func (a *Aggregator) Stats() FilesStats { } type AggregatorContext struct { + ix int64 a *Aggregator accounts *DomainContext storage *DomainContext @@ -1092,9 +1117,12 @@ type AggregatorContext struct { keyBuf []byte } +var aix int64 + func (a *Aggregator) MakeContext() *AggregatorContext { - return &AggregatorContext{ + ac := &AggregatorContext{ a: a, + ix: atomic.AddInt64(&aix, 1), accounts: a.accounts.MakeContext(), storage: a.storage.MakeContext(), code: a.code.MakeContext(), @@ -1104,8 +1132,12 @@ func (a *Aggregator) MakeContext() *AggregatorContext { tracesFrom: a.tracesFrom.MakeContext(), tracesTo: a.tracesTo.MakeContext(), } + _, fl, l, _ := runtime.Caller(1) + fmt.Printf("AggregatorContext %d created by %s\n", ac.ix, fmt.Sprintf("%s:%d", fl, l)) + return ac } func (ac *AggregatorContext) Close() { + fmt.Printf("AggregatorContext %d close\n", ac.ix) ac.accounts.Close() ac.storage.Close() ac.code.Close() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 17450f9c2..7f8c2f0c6 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -34,7 +34,7 @@ func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggreg return kv.ChaindataTablesCfg }).MustOpen() t.Cleanup(db.Close) - agg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) + agg, err := NewAggregator(filepath.Join(path, "e4"), filepath.Join(path, "e4tmp"), aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) return path, db, agg } @@ -81,6 +81,7 @@ func TestAggregator_WinAccess(t *testing.T) { } func TestAggregator_Merge(t *testing.T) { + t.Skip() _, db, agg := testDbAndAggregator(t, 100) defer agg.Close() @@ -216,16 +217,16 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, agg.FinishTx()) } - err = agg.Flush(context.Background()) - require.NoError(t, err) - err = tx.Commit() - require.NoError(t, err) + //err = agg.Flush(context.Background()) agg.FinishWrites() agg.Close() + //require.NoError(t, err) + err = tx.Commit() + require.NoError(t, err) tx = nil // Start another aggregator on same datadir - anotherAgg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) + anotherAgg, err := NewAggregator(filepath.Join(path, "e4"), filepath.Join(path, "e4tmp"), aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) require.NoError(t, anotherAgg.ReopenFolder()) @@ -648,7 +649,8 @@ func Test_BtreeIndex_Allocation(t *testing.T) { break } } - bt := newBtAlloc(uint64(count), uint64(m)< Date: Wed, 1 Mar 2023 13:44:51 +0000 Subject: [PATCH 45/54] heh --- state/aggregator.go | 17 ++++++++++------- state/aggregator_test.go | 2 +- state/domain.go | 13 ++++++------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index fcd4335ff..513eb2dc8 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -444,8 +444,10 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { upmerges++ } - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() + if upmerges > 0 { + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() + } log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, @@ -1054,10 +1056,10 @@ func (a *Aggregator) FinishWrites() { a.logTopics.FinishWrites() a.tracesFrom.FinishWrites() a.tracesTo.FinishWrites() - if a.defaultCtx != nil { - a.defaultCtx.Close() - a.defaultCtx = nil - } + //if a.defaultCtx != nil { + // a.defaultCtx.Close() + // a.defaultCtx = nil + //} } // Flush - must be called before Collate, if you did some writes @@ -1137,7 +1139,8 @@ func (a *Aggregator) MakeContext() *AggregatorContext { return ac } func (ac *AggregatorContext) Close() { - fmt.Printf("AggregatorContext %d close\n", ac.ix) + _, fl, l, _ := runtime.Caller(1) + fmt.Printf("AggregatorContext %d close by %s\n", ac.ix, fmt.Sprintf("%s:%d", fl, l)) ac.accounts.Close() ac.storage.Close() ac.code.Close() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 7f8c2f0c6..97c80b693 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -256,9 +256,9 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { defer roTx.Rollback() dc := anotherAgg.MakeContext() - defer dc.Close() v, err := dc.ReadCommitment([]byte("key"), roTx) require.NoError(t, err) + dc.Close() require.EqualValues(t, maxWrite, binary.BigEndian.Uint64(v[:])) } diff --git a/state/domain.go b/state/domain.go index 3cadf5c32..3e4312029 100644 --- a/state/domain.go +++ b/state/domain.go @@ -26,7 +26,6 @@ import ( "os" "path/filepath" "regexp" - "runtime" "strconv" "strings" "sync/atomic" @@ -679,14 +678,14 @@ var ctxc int64 func (d *Domain) MakeContext() *DomainContext { dc := &DomainContext{ - ix: atomic.LoadInt64(&ctxc), + //ix: atomic.LoadInt64(&ctxc), d: d, hc: d.History.MakeContext(), files: *d.roFiles.Load(), } - atomic.AddInt64(&ctxc, 1) - _, fl, l, _ := runtime.Caller(1) - fmt.Printf("MakeContext: %d %s %s\n", dc.ix, d.filenameBase, fmt.Sprintf("%s:%d", fl, l)) + //atomic.AddInt64(&ctxc, 1) + //_, fl, l, _ := runtime.Caller(1) + //fmt.Printf("MakeContext: %d %s %s\n", dc.ix, d.filenameBase, fmt.Sprintf("%s:%d", fl, l)) for _, item := range dc.files { if !item.src.frozen { item.src.refcount.Inc() @@ -708,12 +707,12 @@ func (dc *DomainContext) Close() { if item.src.decompressor != nil { fn = item.src.decompressor.FileName() } - fmt.Printf("%d %s refCnt: %d [%d-%d]\n", dc.ix, fn, refCnt, item.startTxNum, item.endTxNum) + fmt.Printf("%s refCnt: %d [%d-%d]\n", fn, refCnt, item.startTxNum, item.endTxNum) if refCnt == 0 && item.src.canDelete.Load() { item.src.closeFilesAndRemove() } } - fmt.Printf("Close: %d %s\n", dc.ix, dc.d.filenameBase) + //fmt.Printf("Close: %d %s\n", dc.ix, dc.d.filenameBase) dc.hc.Close() } From 0f30b49c155e7c4f6ded581e24f03e5f060f7549 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 1 Mar 2023 13:50:25 +0000 Subject: [PATCH 46/54] heh --- state/domain.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/state/domain.go b/state/domain.go index 3e4312029..7584d55bb 100644 --- a/state/domain.go +++ b/state/domain.go @@ -607,7 +607,6 @@ func ctxItemLess(i, j ctxItem) bool { //nolint // DomainContext allows accesing the same domain from multiple go-routines type DomainContext struct { - ix int64 d *Domain files []ctxItem getters []*compress.Getter @@ -674,18 +673,12 @@ func (d *Domain) collectFilesStats() (datsz, idxsz, files uint64) { return } -var ctxc int64 - func (d *Domain) MakeContext() *DomainContext { dc := &DomainContext{ - //ix: atomic.LoadInt64(&ctxc), d: d, hc: d.History.MakeContext(), files: *d.roFiles.Load(), } - //atomic.AddInt64(&ctxc, 1) - //_, fl, l, _ := runtime.Caller(1) - //fmt.Printf("MakeContext: %d %s %s\n", dc.ix, d.filenameBase, fmt.Sprintf("%s:%d", fl, l)) for _, item := range dc.files { if !item.src.frozen { item.src.refcount.Inc() @@ -698,7 +691,6 @@ func (d *Domain) MakeContext() *DomainContext { func (dc *DomainContext) Close() { for _, item := range dc.files { if item.src.frozen { - fmt.Printf("%d %s frozen\n", dc.ix, item.src.decompressor.FileName()) continue } refCnt := item.src.refcount.Dec() From 252f141fe4d0c38182b7a4ee6a259588f2ae4dd0 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 1 Mar 2023 14:25:31 +0000 Subject: [PATCH 47/54] fix --- state/aggregator_test.go | 24 ++++++++++++++++++++---- state/domain_test.go | 2 +- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 97c80b693..fd508ab5a 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -29,6 +29,7 @@ import ( func testDbAndAggregator(t *testing.T, aggStep uint64) (string, kv.RwDB, *Aggregator) { t.Helper() path := t.TempDir() + //t.Cleanup(func() { os.RemoveAll(path) }) logger := log.New() db := mdbx.NewMDBX(logger).InMem(filepath.Join(path, "db4")).WithTableCfg(func(defaultBuckets kv.TableCfg) kv.TableCfg { return kv.ChaindataTablesCfg @@ -276,7 +277,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { } }() agg.SetTx(tx) - defer agg.StartWrites().FinishWrites() + agg.StartWrites() txs := aggStep * 5 t.Logf("step=%d tx_count=%d\n", aggStep, txs) @@ -308,6 +309,7 @@ func TestAggregator_RestartOnFiles(t *testing.T) { err = agg.FinishTx() require.NoError(t, err) } + agg.FinishWrites() err = tx.Commit() require.NoError(t, err) @@ -332,16 +334,15 @@ func TestAggregator_RestartOnFiles(t *testing.T) { newAgg, err := NewAggregator(path, path, aggStep, CommitmentModeDirect, commitment.VariantHexPatriciaTrie) require.NoError(t, err) require.NoError(t, newAgg.ReopenFolder()) - defer newAgg.Close() newAgg.SetTx(newTx) + newAgg.StartWrites() latestTx, err := newAgg.SeekCommitment() require.NoError(t, err) t.Logf("seek to latest_tx=%d", latestTx) - ctx := newAgg.MakeContext() - defer ctx.Close() + ctx := newAgg.defaultCtx miss := uint64(0) for i, key := range keys { if uint64(i+1) >= txs-aggStep { @@ -363,8 +364,23 @@ func TestAggregator_RestartOnFiles(t *testing.T) { require.EqualValues(t, key[0], storedV[0]) require.EqualValues(t, key[length.Addr], storedV[1]) } + newAgg.FinishWrites() + ctx.Close() + newAgg.Close() + + require.NoError(t, err) + + list, err := os.ReadDir(filepath.Join(path, "e4")) require.NoError(t, err) + for i := 0; i < len(list); i++ { + if list[i].IsDir() { + continue + } + fmt.Printf("remove %s\n", list[i].Name()) + err = os.Remove(filepath.Join(path, "e4", list[i].Name())) + require.NoError(t, err) + } } func TestAggregator_ReplaceCommittedKeys(t *testing.T) { diff --git a/state/domain_test.go b/state/domain_test.go index 74d0dbf83..81a2ec385 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -38,7 +38,6 @@ import ( func testDbAndDomain(t *testing.T) (string, kv.RwDB, *Domain) { t.Helper() path := t.TempDir() - t.Cleanup(func() { os.RemoveAll(path) }) logger := log.New() keysTable := "Keys" valsTable := "Vals" @@ -69,6 +68,7 @@ func TestCollationBuild(t *testing.T) { defer logEvery.Stop() _, db, d := testDbAndDomain(t) ctx := context.Background() + defer d.Close() tx, err := db.BeginRw(ctx) require.NoError(t, err) From bdbc4b786c7c6d2d20f13241d308d0e6d65a4a1c Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 1 Mar 2023 16:44:38 +0000 Subject: [PATCH 48/54] fix --- state/merge.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/state/merge.go b/state/merge.go index 5a8446b57..9c448514a 100644 --- a/state/merge.go +++ b/state/merge.go @@ -501,6 +501,9 @@ func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, histor if historyIn.index != nil { historyIn.index.Close() } + if historyIn.bindex != nil { + historyIn.bindex.Close() + } } if valuesIn != nil { if valuesIn.decompressor != nil { From 7f0b351bcf287200ed67ba35c4cb51771473d174 Mon Sep 17 00:00:00 2001 From: awskii Date: Wed, 1 Mar 2023 17:43:14 +0000 Subject: [PATCH 49/54] fix --- state/aggregator.go | 11 ++++++----- state/domain.go | 4 +++- state/domain_committed.go | 13 +++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 513eb2dc8..99e70da19 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -434,6 +434,9 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { var upmerges int for { + + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() somethingMerged, err := a.mergeLoopStep(ctx, maxEndTxNum, 1) if err != nil { return err @@ -443,11 +446,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } upmerges++ } - - if upmerges > 0 { - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() - } + a.defaultCtx.Close() + a.defaultCtx = a.MakeContext() log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, @@ -1045,6 +1045,7 @@ func (a *Aggregator) StartWrites() *Aggregator { tracesFrom: a.tracesFrom.MakeContext(), tracesTo: a.tracesTo.MakeContext(), } + a.commitment.patriciaTrie.ResetFns(a.defaultCtx.branchFn, a.defaultCtx.accountFn, a.defaultCtx.storageFn) return a } func (a *Aggregator) FinishWrites() { diff --git a/state/domain.go b/state/domain.go index 7584d55bb..eebdbe3ea 100644 --- a/state/domain.go +++ b/state/domain.go @@ -354,7 +354,7 @@ func (d *Domain) closeWhatNotInList(fNames []string) { for _, item := range toDelete { if item.decompressor != nil { if err := item.decompressor.Close(); err != nil { - log.Trace("close", "err", err, "file", item.index.FileName()) + log.Trace("close", "err", err, "file", item.decompressor.FileName()) } item.decompressor = nil } @@ -1237,6 +1237,8 @@ func (d *Domain) integrateFiles(sf StaticFiles, txNumFrom, txNumTo uint64) { index: sf.valuesIdx, bindex: sf.valuesBt, }) + d.defaultDc.Close() + d.defaultDc = d.MakeContext() d.reCalcRoFiles() } diff --git a/state/domain_committed.go b/state/domain_committed.go index 5dfd6f6b1..7f44fa907 100644 --- a/state/domain_committed.go +++ b/state/domain_committed.go @@ -45,6 +45,19 @@ const ( CommitmentModeUpdate CommitmentMode = 2 ) +func (m CommitmentMode) String() string { + switch m { + case CommitmentModeDisabled: + return "disabled" + case CommitmentModeDirect: + return "direct" + case CommitmentModeUpdate: + return "update" + default: + return "unknown" + } +} + type ValueMerger func(prev, current []byte) (merged []byte, err error) type DomainCommitted struct { From 65078f703d8367f34d2253983c83106205652f28 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 2 Mar 2023 12:46:15 +0000 Subject: [PATCH 50/54] fix --- state/locality_index.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/state/locality_index.go b/state/locality_index.go index a31e5ecb9..7525ec350 100644 --- a/state/locality_index.go +++ b/state/locality_index.go @@ -27,11 +27,12 @@ import ( "strconv" "time" + "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon-lib/common/assert" "github.com/ledgerwatch/erigon-lib/common/dir" "github.com/ledgerwatch/erigon-lib/kv/bitmapdb" "github.com/ledgerwatch/erigon-lib/recsplit" - "github.com/ledgerwatch/log/v3" ) const LocalityIndexUint64Limit = 64 //bitmap spend 1 bit per file, stored as uint64 @@ -62,17 +63,16 @@ func NewLocalityIndex( return li, nil } func (li *LocalityIndex) closeWhatNotInList(fNames []string) { - if li == nil || li.file == nil || li.file.decompressor == nil { + if li == nil || li.bm == nil { return } for _, protectName := range fNames { - if li.file.decompressor.FileName() == protectName { - continue + if li.bm.FileName() == protectName { + return } - li.closeFiles() - break } + li.closeFiles() } func (li *LocalityIndex) OpenList(fNames []string) error { From 704944323d03f25b7602e90ab7d6e25157591918 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 2 Mar 2023 13:00:29 +0000 Subject: [PATCH 51/54] fxi --- state/aggregator.go | 4 ++-- state/domain.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/state/aggregator.go b/state/aggregator.go index 99e70da19..e33874f29 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -446,8 +446,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } upmerges++ } - a.defaultCtx.Close() - a.defaultCtx = a.MakeContext() + //a.defaultCtx.Close() + //a.defaultCtx = a.MakeContext() log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, diff --git a/state/domain.go b/state/domain.go index eebdbe3ea..8740a9ede 100644 --- a/state/domain.go +++ b/state/domain.go @@ -1237,8 +1237,8 @@ func (d *Domain) integrateFiles(sf StaticFiles, txNumFrom, txNumTo uint64) { index: sf.valuesIdx, bindex: sf.valuesBt, }) - d.defaultDc.Close() - d.defaultDc = d.MakeContext() + //d.defaultDc.Close() + //d.defaultDc = d.MakeContext() d.reCalcRoFiles() } From 2a62f519618f6eacec0e4767ec7143be49757fa0 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 2 Mar 2023 14:27:03 +0000 Subject: [PATCH 52/54] debug cleanup --- go.mod | 1 - go.sum | 4 ---- state/aggregator.go | 13 +------------ state/aggregator_test.go | 3 --- state/domain.go | 6 ------ state/domain_test.go | 6 ++++++ 6 files changed, 7 insertions(+), 26 deletions(-) diff --git a/go.mod b/go.mod index 68fe6a847..7302fa421 100644 --- a/go.mod +++ b/go.mod @@ -95,7 +95,6 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rs/dnscache v0.0.0-20211102005908-e0241e321417 // indirect - github.com/stretchr/objx v0.5.0 // indirect github.com/valyala/fastrand v1.1.0 // indirect github.com/valyala/histogram v1.2.0 // indirect go.etcd.io/bbolt v1.3.6 // indirect diff --git a/go.sum b/go.sum index 1ae6a2b0d..f011bf50c 100644 --- a/go.sum +++ b/go.sum @@ -356,7 +356,6 @@ github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -367,7 +366,6 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -413,8 +411,6 @@ golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU golang.org/x/crypto v0.6.0 h1:qfktjS5LUO+fFKeJXZ+ikTRijMmljikvG68fpMMruSc= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20230213192124-5e25df0256eb h1:PaBZQdo+iSDyHT053FjUCgZQ/9uqVwPOcl7KSWhKn6w= -golang.org/x/exp v0.0.0-20230213192124-5e25df0256eb/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/state/aggregator.go b/state/aggregator.go index e33874f29..28951ef76 100644 --- a/state/aggregator.go +++ b/state/aggregator.go @@ -23,7 +23,6 @@ import ( "math" "math/bits" "os" - "runtime" "sync" "sync/atomic" "time" @@ -434,7 +433,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { var upmerges int for { - a.defaultCtx.Close() a.defaultCtx = a.MakeContext() somethingMerged, err := a.mergeLoopStep(ctx, maxEndTxNum, 1) @@ -446,8 +444,6 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error { } upmerges++ } - //a.defaultCtx.Close() - //a.defaultCtx = a.MakeContext() log.Info("[stat] aggregation merged", "upto_tx", maxEndTxNum, @@ -1107,7 +1103,6 @@ func (a *Aggregator) Stats() FilesStats { } type AggregatorContext struct { - ix int64 a *Aggregator accounts *DomainContext storage *DomainContext @@ -1120,12 +1115,9 @@ type AggregatorContext struct { keyBuf []byte } -var aix int64 - func (a *Aggregator) MakeContext() *AggregatorContext { ac := &AggregatorContext{ a: a, - ix: atomic.AddInt64(&aix, 1), accounts: a.accounts.MakeContext(), storage: a.storage.MakeContext(), code: a.code.MakeContext(), @@ -1135,13 +1127,10 @@ func (a *Aggregator) MakeContext() *AggregatorContext { tracesFrom: a.tracesFrom.MakeContext(), tracesTo: a.tracesTo.MakeContext(), } - _, fl, l, _ := runtime.Caller(1) - fmt.Printf("AggregatorContext %d created by %s\n", ac.ix, fmt.Sprintf("%s:%d", fl, l)) return ac } + func (ac *AggregatorContext) Close() { - _, fl, l, _ := runtime.Caller(1) - fmt.Printf("AggregatorContext %d close by %s\n", ac.ix, fmt.Sprintf("%s:%d", fl, l)) ac.accounts.Close() ac.storage.Close() ac.code.Close() diff --git a/state/aggregator_test.go b/state/aggregator_test.go index fd508ab5a..0fdedc47a 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -82,7 +82,6 @@ func TestAggregator_WinAccess(t *testing.T) { } func TestAggregator_Merge(t *testing.T) { - t.Skip() _, db, agg := testDbAndAggregator(t, 100) defer agg.Close() @@ -155,11 +154,9 @@ func TestAggregator_Merge(t *testing.T) { require.EqualValues(t, maxWrite, binary.BigEndian.Uint64(v[:])) v, err = dc.ReadCommitment([]byte("otherroothash"), roTx) - require.NoError(t, err) dc.Close() require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) - time.Sleep(time.Second * 2) // lol let aggregator remove its files first } // here we create a bunch of updates for further aggregation. diff --git a/state/domain.go b/state/domain.go index 0f5d7e981..95ed69515 100644 --- a/state/domain.go +++ b/state/domain.go @@ -694,16 +694,10 @@ func (dc *DomainContext) Close() { } refCnt := item.src.refcount.Dec() //GC: last reader responsible to remove useles files: close it and delete - var fn string - if item.src.decompressor != nil { - fn = item.src.decompressor.FileName() - } - fmt.Printf("%s refCnt: %d [%d-%d]\n", fn, refCnt, item.startTxNum, item.endTxNum) if refCnt == 0 && item.src.canDelete.Load() { item.src.closeFilesAndRemove() } } - //fmt.Printf("Close: %d %s\n", dc.ix, dc.d.filenameBase) dc.hc.Close() } diff --git a/state/domain_test.go b/state/domain_test.go index 81a2ec385..3e7003343 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -93,6 +93,8 @@ func TestCollationBuild(t *testing.T) { require.NoError(t, err) c, err := d.collate(ctx, 0, 0, 7, tx, logEvery) + defer c.Close() //nolint:errcheck + require.NoError(t, err) require.True(t, strings.HasSuffix(c.valuesPath, "base.0-1.kv")) require.Equal(t, 2, c.valuesCount) @@ -105,6 +107,7 @@ func TestCollationBuild(t *testing.T) { sf, err := d.buildFiles(ctx, 0, c) require.NoError(t, err) defer sf.Close() + g := sf.valuesDecomp.MakeGetter() g.Reset(0) var words []string @@ -115,7 +118,10 @@ func TestCollationBuild(t *testing.T) { require.Equal(t, []string{"key1", "value1.2", "key2", "value2.1"}, words) // Check index require.Equal(t, 2, int(sf.valuesIdx.KeyCount())) + + c.Close() r := recsplit.NewIndexReader(sf.valuesIdx) + defer r.Close() for i := 0; i < len(words); i += 2 { offset := r.Lookup([]byte(words[i])) g.Reset(offset) From 785077dab32ef6fa20b081166ec546ab8b9d5380 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 2 Mar 2023 14:31:27 +0000 Subject: [PATCH 53/54] lint --- state/aggregator_test.go | 1 + state/domain_test.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 0fdedc47a..8ad69f3f7 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -154,6 +154,7 @@ func TestAggregator_Merge(t *testing.T) { require.EqualValues(t, maxWrite, binary.BigEndian.Uint64(v[:])) v, err = dc.ReadCommitment([]byte("otherroothash"), roTx) + require.NoError(t, err) dc.Close() require.EqualValues(t, otherMaxWrite, binary.BigEndian.Uint64(v[:])) diff --git a/state/domain_test.go b/state/domain_test.go index 3e7003343..bd3907721 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -93,7 +93,7 @@ func TestCollationBuild(t *testing.T) { require.NoError(t, err) c, err := d.collate(ctx, 0, 0, 7, tx, logEvery) - defer c.Close() //nolint:errcheck + defer c.Close() //nolint require.NoError(t, err) require.True(t, strings.HasSuffix(c.valuesPath, "base.0-1.kv")) From f05b68af12c6ab6001a8454231cf282812f8410f Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 2 Mar 2023 14:46:44 +0000 Subject: [PATCH 54/54] fix intermediate file close --- state/aggregator_test.go | 15 +-------------- state/domain.go | 3 +++ state/domain_test.go | 3 +-- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/state/aggregator_test.go b/state/aggregator_test.go index 8ad69f3f7..4bba5c18a 100644 --- a/state/aggregator_test.go +++ b/state/aggregator_test.go @@ -216,10 +216,9 @@ func TestAggregator_RestartOnDatadir(t *testing.T) { require.NoError(t, agg.FinishTx()) } - //err = agg.Flush(context.Background()) agg.FinishWrites() agg.Close() - //require.NoError(t, err) + err = tx.Commit() require.NoError(t, err) tx = nil @@ -367,18 +366,6 @@ func TestAggregator_RestartOnFiles(t *testing.T) { newAgg.Close() require.NoError(t, err) - - list, err := os.ReadDir(filepath.Join(path, "e4")) - require.NoError(t, err) - - for i := 0; i < len(list); i++ { - if list[i].IsDir() { - continue - } - fmt.Printf("remove %s\n", list[i].Name()) - err = os.Remove(filepath.Join(path, "e4", list[i].Name())) - require.NoError(t, err) - } } func TestAggregator_ReplaceCommittedKeys(t *testing.T) { diff --git a/state/domain.go b/state/domain.go index 95ed69515..7a26907d8 100644 --- a/state/domain.go +++ b/state/domain.go @@ -1052,6 +1052,9 @@ func (sf StaticFiles) Close() { if sf.valuesIdx != nil { sf.valuesIdx.Close() } + if sf.valuesBt != nil { + sf.valuesBt.Close() + } if sf.historyDecomp != nil { sf.historyDecomp.Close() } diff --git a/state/domain_test.go b/state/domain_test.go index bd3907721..e14c34fc2 100644 --- a/state/domain_test.go +++ b/state/domain_test.go @@ -93,7 +93,6 @@ func TestCollationBuild(t *testing.T) { require.NoError(t, err) c, err := d.collate(ctx, 0, 0, 7, tx, logEvery) - defer c.Close() //nolint require.NoError(t, err) require.True(t, strings.HasSuffix(c.valuesPath, "base.0-1.kv")) @@ -107,6 +106,7 @@ func TestCollationBuild(t *testing.T) { sf, err := d.buildFiles(ctx, 0, c) require.NoError(t, err) defer sf.Close() + c.Close() g := sf.valuesDecomp.MakeGetter() g.Reset(0) @@ -119,7 +119,6 @@ func TestCollationBuild(t *testing.T) { // Check index require.Equal(t, 2, int(sf.valuesIdx.KeyCount())) - c.Close() r := recsplit.NewIndexReader(sf.valuesIdx) defer r.Close() for i := 0; i < len(words); i += 2 {