Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(orm)!: ordered variable length encoding for uint32 and uint64 types #11090

Merged
merged 9 commits into from
Feb 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions orm/encoding/ormfield/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,14 @@ func GetCodec(field protoreflect.FieldDescriptor, nonTerminal bool) (Codec, erro
} else {
return StringCodec{}, nil
}
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
return Uint32Codec{}, nil
case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
return Uint64Codec{}, nil
case protoreflect.Uint32Kind:
return CompactUint32Codec{}, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it easy to make the encoding of N<2**32-1 numbers the same in both CompactUint{32,64}Codec? I feel like people will be confused if the same number is encoded in 2 different compact ways depending if it's uint32 or uint64.

if we keep 2,4,6,9 for uint64, then only 2,4 modes will be valid for CompactUint32Codec. It will be less space-performant for sure, but maybe worth the mental overhead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm... these aren't really ever intended to be used by people. it's sort of like a database internal detail which you should only know if you're making a custom implementation. users should just need to know that the database made a performant and correct choice and have some guidance on when to use which data type to get which performance characteristics

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's only database-internal. Storage keys relate to proofs, so IBC, UI client libs, light clients etc. Those developers will be re-implementing this, at least once per language.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true... but not sure it's worth the compromise. it will mean large uin32's will need 6 bytes...

case protoreflect.Fixed32Kind:
return FixedUint32Codec{}, nil
case protoreflect.Uint64Kind:
return CompactUint64Codec{}, nil
case protoreflect.Fixed64Kind:
return FixedUint64Codec{}, nil
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
return Int32Codec{}, nil
case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
Expand Down
94 changes: 94 additions & 0 deletions orm/encoding/ormfield/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,97 @@ func TestNTBytesTooLong(t *testing.T) {
_, err = cdc.ComputeBufferSize(bz)
assert.ErrorContains(t, err, ormerrors.BytesFieldTooLong.Error())
}

func TestCompactUInt32(t *testing.T) {
var lastBz []byte
testEncodeDecode := func(x uint32, expectedLen int) {
bz := ormfield.EncodeCompactUint32(x)
assert.Equal(t, expectedLen, len(bz))
y, err := ormfield.DecodeCompactUint32(bytes.NewReader(bz))
assert.NilError(t, err)
assert.Equal(t, x, y)
assert.Assert(t, bytes.Compare(lastBz, bz) < 0)
lastBz = bz
}

testEncodeDecode(64, 2)
testEncodeDecode(16383, 2)
testEncodeDecode(16384, 3)
testEncodeDecode(4194303, 3)
testEncodeDecode(4194304, 4)
testEncodeDecode(1073741823, 4)
testEncodeDecode(1073741824, 5)

// randomized tests
rapid.Check(t, func(t *rapid.T) {
x := rapid.Uint32().Draw(t, "x").(uint32)
y := rapid.Uint32().Draw(t, "y").(uint32)

bx := ormfield.EncodeCompactUint32(x)
by := ormfield.EncodeCompactUint32(y)

cmp := bytes.Compare(bx, by)
if x < y {
assert.Equal(t, -1, cmp)
} else if x == y {
assert.Equal(t, 0, cmp)
} else {
assert.Equal(t, 1, cmp)
}

x2, err := ormfield.DecodeCompactUint32(bytes.NewReader(bx))
assert.NilError(t, err)
assert.Equal(t, x, x2)
y2, err := ormfield.DecodeCompactUint32(bytes.NewReader(by))
assert.NilError(t, err)
assert.Equal(t, y, y2)
})
}

func TestCompactUInt64(t *testing.T) {
var lastBz []byte
testEncodeDecode := func(x uint64, expectedLen int) {
bz := ormfield.EncodeCompactUint64(x)
assert.Equal(t, expectedLen, len(bz))
y, err := ormfield.DecodeCompactUint64(bytes.NewReader(bz))
assert.NilError(t, err)
assert.Equal(t, x, y)
assert.Assert(t, bytes.Compare(lastBz, bz) < 0)
lastBz = bz
}

testEncodeDecode(64, 2)
testEncodeDecode(16383, 2)
testEncodeDecode(16384, 4)
testEncodeDecode(4194303, 4)
testEncodeDecode(4194304, 4)
testEncodeDecode(1073741823, 4)
testEncodeDecode(1073741824, 6)
testEncodeDecode(70368744177663, 6)
testEncodeDecode(70368744177664, 9)

// randomized tests
rapid.Check(t, func(t *rapid.T) {
x := rapid.Uint64().Draw(t, "x").(uint64)
y := rapid.Uint64().Draw(t, "y").(uint64)

bx := ormfield.EncodeCompactUint64(x)
by := ormfield.EncodeCompactUint64(y)

cmp := bytes.Compare(bx, by)
if x < y {
assert.Equal(t, -1, cmp)
} else if x == y {
assert.Equal(t, 0, cmp)
} else {
assert.Equal(t, 1, cmp)
}

x2, err := ormfield.DecodeCompactUint64(bytes.NewReader(bx))
assert.NilError(t, err)
assert.Equal(t, x, x2)
y2, err := ormfield.DecodeCompactUint64(bytes.NewReader(by))
assert.NilError(t, err)
assert.Equal(t, y, y2)
})
}
159 changes: 151 additions & 8 deletions orm/encoding/ormfield/uint32.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,179 @@ package ormfield

import (
"encoding/binary"
"fmt"
"io"

"google.golang.org/protobuf/reflect/protoreflect"
)

// Uint32Codec encodes uint32 values as 4-byte big-endian integers.
type Uint32Codec struct{}
// FixedUint32Codec encodes uint32 values as 4-byte big-endian integers.
type FixedUint32Codec struct{}

func (u Uint32Codec) FixedBufferSize() int {
func (u FixedUint32Codec) FixedBufferSize() int {
return 4
}

func (u Uint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
func (u FixedUint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
return u.FixedBufferSize(), nil
}

func (u Uint32Codec) IsOrdered() bool {
func (u FixedUint32Codec) IsOrdered() bool {
return true
}

func (u Uint32Codec) Compare(v1, v2 protoreflect.Value) int {
func (u FixedUint32Codec) Compare(v1, v2 protoreflect.Value) int {
return compareUint(v1, v2)
}

func (u Uint32Codec) Decode(r Reader) (protoreflect.Value, error) {
func (u FixedUint32Codec) Decode(r Reader) (protoreflect.Value, error) {
var x uint32
err := binary.Read(r, binary.BigEndian, &x)
return protoreflect.ValueOfUint32(x), err
}

func (u Uint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
func (u FixedUint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
return binary.Write(w, binary.BigEndian, uint32(value.Uint()))
}

// CompactUint32Codec encodes uint32 values using EncodeCompactUint32.
type CompactUint32Codec struct{}

func (c CompactUint32Codec) Decode(r Reader) (protoreflect.Value, error) {
x, err := DecodeCompactUint32(r)
return protoreflect.ValueOfUint32(x), err
}

func (c CompactUint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
_, err := w.Write(EncodeCompactUint32(uint32(value.Uint())))
return err
}

func (c CompactUint32Codec) Compare(v1, v2 protoreflect.Value) int {
return compareUint(v1, v2)
}

func (c CompactUint32Codec) IsOrdered() bool {
return true
}

func (c CompactUint32Codec) FixedBufferSize() int {
return 5
}

func (c CompactUint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
return c.FixedBufferSize(), nil
}

// EncodeCompactUint32 encodes uint32 values in 2,3,4 or 5 bytes.
// Unlike regular varints, this encoding is
// suitable for ordered prefix scans. The length of the output + 2 is encoded
// in the first 2 bits of the first byte and the remaining bits encoded with
// big-endian ordering.
// Values less than 2^14 fill fit in 2 bytes, values less than 2^22 will
// fit in 3, and values less than 2^30 will fit in 4.
func EncodeCompactUint32(x uint32) []byte {
switch {
case x < 16384: // 2^14
buf := make([]byte, 2)
buf[0] = byte(x >> 8)
buf[1] = byte(x)
return buf
case x < 4194304: // 2^22
buf := make([]byte, 3)
buf[0] = 0x40
buf[0] |= byte(x >> 16)
buf[1] = byte(x >> 8)
buf[2] = byte(x)
return buf
case x < 1073741824: // 2^30
buf := make([]byte, 4)
buf[0] = 0x80
buf[0] |= byte(x >> 24)
buf[1] = byte(x >> 16)
buf[2] = byte(x >> 8)
buf[3] = byte(x)
return buf
default:
buf := make([]byte, 5)
buf[0] = 0xC0
buf[0] |= byte(x >> 26)
buf[1] = byte(x >> 18)
buf[2] = byte(x >> 10)
buf[3] = byte(x >> 2)
buf[4] = byte(x) & 0x3
return buf
}
}

// DecodeCompactUint32 decodes a uint32 encoded with EncodeCompactU32.
func DecodeCompactUint32(reader io.Reader) (uint32, error) {
var buf [5]byte

n, err := reader.Read(buf[:1])
if err != nil {
return 0, err
}
if n < 1 {
return 0, io.ErrUnexpectedEOF
}

switch buf[0] >> 6 {
case 0:
n, err := reader.Read(buf[1:2])
if err != nil {
return 0, err
}
if n < 1 {
return 0, io.ErrUnexpectedEOF
}

x := uint32(buf[0]) << 8
x |= uint32(buf[1])
return x, nil
case 1:
n, err := reader.Read(buf[1:3])
if err != nil {
return 0, err
}
if n < 2 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 16
x |= uint32(buf[1]) << 8
x |= uint32(buf[2])
return x, nil
case 2:
n, err := reader.Read(buf[1:4])
if err != nil {
return 0, err
}
if n < 3 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 24
x |= uint32(buf[1]) << 16
x |= uint32(buf[2]) << 8
x |= uint32(buf[3])
return x, nil
case 3:
n, err := reader.Read(buf[1:5])
if err != nil {
return 0, err
}
if n < 4 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 26
x |= uint32(buf[1]) << 18
x |= uint32(buf[2]) << 10
x |= uint32(buf[3]) << 2
x |= uint32(buf[4])
return x, nil
default:
return 0, fmt.Errorf("unexpected case")
}
}
Loading