Skip to content

Commit

Permalink
feat: optimise reading ints and longs (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
nrwiersma authored Jan 29, 2024
1 parent aa7f619 commit 832df22
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 59 deletions.
86 changes: 63 additions & 23 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,46 +139,86 @@ func (r *Reader) ReadBool() bool {

// ReadInt reads an Int from the Reader.
func (r *Reader) ReadInt() int32 {
var val uint32
var offset int8
if r.Error != nil {
return 0
}

var (
n int
v uint32
s uint8
)

for {
tail := r.tail
if r.tail-r.head+n > maxIntBufSize {
tail = r.head + maxIntBufSize - n
}

for r.Error == nil {
if offset == maxIntBufSize {
// Consume what it is in the buffer.
for i, b := range r.buf[r.head:tail] {
v |= uint32(b&0x7f) << s
if b&0x80 == 0 {
r.head += i + 1
return int32((v >> 1) ^ -(v & 1))
}
s += 7
n++
}
if n >= maxIntBufSize {
r.ReportError("ReadInt", "int overflow")
return 0
}
r.head += n

b := r.readByte()
val |= uint32(b&0x7F) << uint(7*offset)
if b&0x80 == 0 {
break
// We ran out of buffer and are not at the end of the int,
// Read more into the buffer.
if !r.loadMore() {
return 0
}
offset++
}

return int32((val >> 1) ^ -(val & 1))
}

// ReadLong reads a Long from the Reader.
func (r *Reader) ReadLong() int64 {
var val uint64
var offset int8
if r.Error != nil {
return 0
}

var (
n int
v uint64
s uint8
)

for {
tail := r.tail
if r.tail-r.head+n > maxLongBufSize {
tail = r.head + maxLongBufSize - n
}

for r.Error == nil {
if offset == maxLongBufSize {
r.ReportError("ReadLong", "long overflow")
// Consume what it is in the buffer.
for i, b := range r.buf[r.head:tail] {
v |= uint64(b&0x7f) << s
if b&0x80 == 0 {
r.head += i + 1
return int64((v >> 1) ^ -(v & 1))
}
s += 7
n++
}
if n >= maxLongBufSize {
r.ReportError("ReadLong", "int overflow")
return 0
}
r.head += n

b := r.readByte()
val |= uint64(b&0x7F) << uint(7*offset)
if b&0x80 == 0 {
break
// We ran out of buffer and are not at the end of the long,
// Read more into the buffer.
if !r.loadMore() {
return 0
}
offset++
}

return int64((val >> 1) ^ -(val & 1))
}

// ReadFloat reads a Float from the Reader.
Expand Down
20 changes: 6 additions & 14 deletions reader_skip.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,25 @@ func (r *Reader) SkipBool() {

// SkipInt skips an Int in the reader.
func (r *Reader) SkipInt() {
var offset int8
for r.Error == nil {
if offset == maxIntBufSize {
return
}

var n int
for r.Error == nil && n < maxIntBufSize {
b := r.readByte()
if b&0x80 == 0 {
break
}
offset++
n++
}
}

// SkipLong skips a Long in the reader.
func (r *Reader) SkipLong() {
var offset int8
for r.Error == nil {
if offset == maxLongBufSize {
return
}

var n int
for r.Error == nil && n < maxLongBufSize {
b := r.readByte()
if b&0x80 == 0 {
break
}
offset++
n++
}
}

Expand Down
56 changes: 34 additions & 22 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "negative int",
data: []byte{0x0F},
data: []byte{0x0f},
want: -8,
wantErr: require.NoError,
},
Expand All @@ -183,7 +183,7 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "negative 64",
data: []byte{0x7F},
data: []byte{0x7f},
want: -64,
wantErr: require.NoError,
},
Expand All @@ -195,34 +195,32 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "large int",
data: []byte{0xAA, 0xB4, 0xDE, 0x75},
data: []byte{0xaa, 0xb4, 0xde, 0x75},
want: 123456789,
wantErr: require.NoError,
},
{
name: "larger int",
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07},
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07},
want: 987654321,
wantErr: require.NoError,
},
{
name: "overflow",
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD},
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad},
want: 0,
wantErr: require.Error,
},
{
name: "eof",
data: []byte{0xE2},
want: 49,
data: []byte{0xe2},
want: 0,
wantErr: require.Error,
},
}

for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {

r := avro.NewReader(bytes.NewReader(test.data), 10)

got := r.ReadInt()
Expand All @@ -235,85 +233,99 @@ func TestReader_ReadInt(t *testing.T) {

func TestReader_ReadLong(t *testing.T) {
tests := []struct {
name string
data []byte
want int64
wantErr require.ErrorAssertionFunc
}{
{
name: "long",
data: []byte{0x36},
want: 27,
wantErr: require.NoError,
},
{
data: []byte{0x0F},
name: "negative long",
data: []byte{0x0f},
want: -8,
wantErr: require.NoError,
},
{
name: "negative long",
data: []byte{0x01},
want: -1,
wantErr: require.NoError,
},
{
name: "zero",
data: []byte{0x00},
want: 0,
wantErr: require.NoError,
},
{
name: "one",
data: []byte{0x02},
want: 1,
wantErr: require.NoError,
},
{
data: []byte{0x7F},
name: "negative 64",
data: []byte{0x7f},
want: -64,
wantErr: require.NoError,
},
{
name: "multi-byte",
data: []byte{0x80, 0x01},
want: 64,
wantErr: require.NoError,
},
{
data: []byte{0xAA, 0xB4, 0xDE, 0x75},
name: "large long",
data: []byte{0xaa, 0xb4, 0xde, 0x75},
want: 123456789,
wantErr: require.NoError,
},
{
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07},
name: "larger long",
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07},
want: 987654321,
wantErr: require.NoError,
},
{
data: []byte{0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01},
name: "very very big long",
data: []byte{0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
want: 9223372036854775807,
wantErr: require.NoError,
},
{
data: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01},
name: "very very big negative long",
data: []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
want: -9223372036854775808,
wantErr: require.NoError,
},
{
data: []byte{0xBD, 0xB1, 0xAE, 0xD4, 0xD2, 0xCD, 0xBD, 0xE4, 0x97, 0x01},
name: "very very big negative long",
data: []byte{0xbd, 0xb1, 0xae, 0xd4, 0xd2, 0xcd, 0xbd, 0xe4, 0x97, 0x01},
want: -5468631321897454687,
wantErr: require.NoError,
},
{
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD, 0xE2, 0xA2, 0xF3, 0xAD, 0xAD}, // Overflow
name: "overflow",
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3, 0xad, 0xad},
want: 0,
wantErr: require.Error,
},
{
data: []byte{0xE2}, // io.EOF
want: 49,
name: "eof",
data: []byte{0xe2},
want: 0,
wantErr: require.Error,
},
}

for i, test := range tests {
test := test
t.Run(strconv.Itoa(i), func(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := avro.NewReader(bytes.NewReader(test.data), 10)

got := r.ReadLong()
Expand Down

0 comments on commit 832df22

Please sign in to comment.