From ffd7a11618fbdb665350c19e4d37d36b1384152f Mon Sep 17 00:00:00 2001 From: Nicholas Wiersma Date: Wed, 24 Jan 2024 18:11:25 +0200 Subject: [PATCH] feat: optimise reading ints and longs --- reader.go | 86 ++++++++++++++++++++++++++++++++++++-------------- reader_skip.go | 20 ++++-------- reader_test.go | 56 +++++++++++++++++++------------- 3 files changed, 103 insertions(+), 59 deletions(-) diff --git a/reader.go b/reader.go index 96b80759..0883f024 100644 --- a/reader.go +++ b/reader.go @@ -139,46 +139,86 @@ func (r *Reader) ReadBool() bool { // ReadInt reads an Int from the Reader. func (r *Reader) ReadInt() int32 { - var val uint32 - var offset int8 + if r.Error != nil { + return 0 + } + + var ( + n int + v uint32 + s uint8 + ) + + for { + tail := r.tail + if r.tail-r.head+n > maxIntBufSize { + tail = r.head + maxIntBufSize - n + } - for r.Error == nil { - if offset == maxIntBufSize { + // Consume what it is in the buffer. + for i, b := range r.buf[r.head:tail] { + v |= uint32(b&0x7f) << s + if b&0x80 == 0 { + r.head += i + 1 + return int32((v >> 1) ^ -(v & 1)) + } + s += 7 + n++ + } + if n >= maxIntBufSize { r.ReportError("ReadInt", "int overflow") return 0 } + r.head += n - b := r.readByte() - val |= uint32(b&0x7F) << uint(7*offset) - if b&0x80 == 0 { - break + // We ran out of buffer and are not at the end of the long, + // Read more into the buffer. + if !r.loadMore() { + return 0 } - offset++ } - - return int32((val >> 1) ^ -(val & 1)) } // ReadLong reads a Long from the Reader. func (r *Reader) ReadLong() int64 { - var val uint64 - var offset int8 + if r.Error != nil { + return 0 + } + + var ( + n int + v uint64 + s uint8 + ) + + for { + tail := r.tail + if r.tail-r.head+n > maxLongBufSize { + tail = r.head + maxLongBufSize - n + } - for r.Error == nil { - if offset == maxLongBufSize { - r.ReportError("ReadLong", "long overflow") + // Consume what it is in the buffer. + for i, b := range r.buf[r.head:tail] { + v |= uint64(b&0x7f) << s + if b&0x80 == 0 { + r.head += i + 1 + return int64((v >> 1) ^ -(v & 1)) + } + s += 7 + n++ + } + if n >= maxLongBufSize { + r.ReportError("ReadLong", "int overflow") return 0 } + r.head += n - b := r.readByte() - val |= uint64(b&0x7F) << uint(7*offset) - if b&0x80 == 0 { - break + // We ran out of buffer and are not at the end of the long, + // Read more into the buffer. + if !r.loadMore() { + return 0 } - offset++ } - - return int64((val >> 1) ^ -(val & 1)) } // ReadFloat reads a Float from the Reader. diff --git a/reader_skip.go b/reader_skip.go index bac94c44..94288c8c 100644 --- a/reader_skip.go +++ b/reader_skip.go @@ -28,33 +28,25 @@ func (r *Reader) SkipBool() { // SkipInt skips an Int in the reader. func (r *Reader) SkipInt() { - var offset int8 - for r.Error == nil { - if offset == maxIntBufSize { - return - } - + var n int + for r.Error == nil && n < maxIntBufSize { b := r.readByte() if b&0x80 == 0 { break } - offset++ + n++ } } // SkipLong skips a Long in the reader. func (r *Reader) SkipLong() { - var offset int8 - for r.Error == nil { - if offset == maxLongBufSize { - return - } - + var n int + for r.Error == nil && n < maxLongBufSize { b := r.readByte() if b&0x80 == 0 { break } - offset++ + n++ } } diff --git a/reader_test.go b/reader_test.go index f0835f20..ec8ef4e0 100644 --- a/reader_test.go +++ b/reader_test.go @@ -159,7 +159,7 @@ func TestReader_ReadInt(t *testing.T) { }, { name: "negative int", - data: []byte{0x0F}, + data: []byte{0x0f}, want: -8, wantErr: require.NoError, }, @@ -183,7 +183,7 @@ func TestReader_ReadInt(t *testing.T) { }, { name: "negative 64", - data: []byte{0x7F}, + data: []byte{0x7f}, want: -64, wantErr: require.NoError, }, @@ -195,34 +195,32 @@ func TestReader_ReadInt(t *testing.T) { }, { name: "large int", - data: []byte{0xAA, 0xB4, 0xDE, 0x75}, + data: []byte{0xaa, 0xb4, 0xde, 0x75}, want: 123456789, wantErr: require.NoError, }, { name: "larger int", - data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07}, + data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07}, want: 987654321, wantErr: require.NoError, }, { name: "overflow", - data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD}, + data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad}, want: 0, wantErr: require.Error, }, { name: "eof", - data: []byte{0xE2}, - want: 49, + data: []byte{0xe2}, + want: 0, wantErr: require.Error, }, } for _, test := range tests { - test := test t.Run(test.name, func(t *testing.T) { - r := avro.NewReader(bytes.NewReader(test.data), 10) got := r.ReadInt() @@ -235,85 +233,99 @@ func TestReader_ReadInt(t *testing.T) { func TestReader_ReadLong(t *testing.T) { tests := []struct { + name string data []byte want int64 wantErr require.ErrorAssertionFunc }{ { + name: "long", data: []byte{0x36}, want: 27, wantErr: require.NoError, }, { - data: []byte{0x0F}, + name: "negative long", + data: []byte{0x0f}, want: -8, wantErr: require.NoError, }, { + name: "negative long", data: []byte{0x01}, want: -1, wantErr: require.NoError, }, { + name: "zero", data: []byte{0x00}, want: 0, wantErr: require.NoError, }, { + name: "one", data: []byte{0x02}, want: 1, wantErr: require.NoError, }, { - data: []byte{0x7F}, + name: "negative 64", + data: []byte{0x7f}, want: -64, wantErr: require.NoError, }, { + name: "multi-byte", data: []byte{0x80, 0x01}, want: 64, wantErr: require.NoError, }, { - data: []byte{0xAA, 0xB4, 0xDE, 0x75}, + name: "large long", + data: []byte{0xaa, 0xb4, 0xde, 0x75}, want: 123456789, wantErr: require.NoError, }, { - data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07}, + name: "larger long", + data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07}, want: 987654321, wantErr: require.NoError, }, { - data: []byte{0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01}, + name: "very very big long", + data: []byte{0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, want: 9223372036854775807, wantErr: require.NoError, }, { - data: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01}, + name: "very very big negative long", + data: []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, want: -9223372036854775808, wantErr: require.NoError, }, { - data: []byte{0xBD, 0xB1, 0xAE, 0xD4, 0xD2, 0xCD, 0xBD, 0xE4, 0x97, 0x01}, + name: "very very big negative long", + data: []byte{0xbd, 0xb1, 0xae, 0xd4, 0xd2, 0xcd, 0xbd, 0xe4, 0x97, 0x01}, want: -5468631321897454687, wantErr: require.NoError, }, { - data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD, 0xE2, 0xA2, 0xF3, 0xAD, 0xAD}, // Overflow + name: "overflow", + data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3, 0xad, 0xad}, want: 0, wantErr: require.Error, }, { - data: []byte{0xE2}, // io.EOF - want: 49, + name: "eof", + data: []byte{0xe2}, + want: 0, wantErr: require.Error, }, } - for i, test := range tests { - test := test - t.Run(strconv.Itoa(i), func(t *testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { r := avro.NewReader(bytes.NewReader(test.data), 10) got := r.ReadLong()