diff options
| author | Robert Griesemer <gri@golang.org> | 2011-09-28 22:36:52 -0700 |
|---|---|---|
| committer | Robert Griesemer <gri@golang.org> | 2011-09-28 22:36:52 -0700 |
| commit | f30719dc89c2a41502fa584b790943170ad2d1ce (patch) | |
| tree | dfe3b4fdcbe4ebe1c234d3abfa5b050ee39a6093 /src/pkg/encoding | |
| parent | b74136984dc04d3f0025f3eaf55c0ecdf3c38857 (diff) | |
| download | go-f30719dc89c2a41502fa584b790943170ad2d1ce.tar.xz | |
encoding/binary: support for varint encoding
R=rsc, r, nigeltao, r, dsymonds
CC=golang-dev
https://golang.org/cl/5146048
Diffstat (limited to 'src/pkg/encoding')
| -rw-r--r-- | src/pkg/encoding/binary/Makefile | 1 | ||||
| -rw-r--r-- | src/pkg/encoding/binary/binary.go | 6 | ||||
| -rw-r--r-- | src/pkg/encoding/binary/varint.go | 163 | ||||
| -rw-r--r-- | src/pkg/encoding/binary/varint_test.go | 175 |
4 files changed, 342 insertions, 3 deletions
diff --git a/src/pkg/encoding/binary/Makefile b/src/pkg/encoding/binary/Makefile index dc46abe909..3246f5a387 100644 --- a/src/pkg/encoding/binary/Makefile +++ b/src/pkg/encoding/binary/Makefile @@ -7,5 +7,6 @@ include ../../../Make.inc TARG=encoding/binary GOFILES=\ binary.go\ + varint.go\ include ../../../Make.pkg diff --git a/src/pkg/encoding/binary/binary.go b/src/pkg/encoding/binary/binary.go index 8e55cb23b7..c58f73694b 100644 --- a/src/pkg/encoding/binary/binary.go +++ b/src/pkg/encoding/binary/binary.go @@ -17,9 +17,9 @@ import ( // A ByteOrder specifies how to convert byte sequences into // 16-, 32-, or 64-bit unsigned integers. type ByteOrder interface { - Uint16(b []byte) uint16 - Uint32(b []byte) uint32 - Uint64(b []byte) uint64 + Uint16([]byte) uint16 + Uint32([]byte) uint32 + Uint64([]byte) uint64 PutUint16([]byte, uint16) PutUint32([]byte, uint32) PutUint64([]byte, uint64) diff --git a/src/pkg/encoding/binary/varint.go b/src/pkg/encoding/binary/varint.go new file mode 100644 index 0000000000..1439dd3faa --- /dev/null +++ b/src/pkg/encoding/binary/varint.go @@ -0,0 +1,163 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +// This file implements "varint" encoding of 64-bit integers. +// The encoding is: +// - unsigned integers are serialized 7 bits at a time, starting with the +// least significant bits +// - the most significant bit (msb) in each output byte indicates if there +// is a continuation byte (msb = 1) +// - signed integers are mapped to unsigned integers using "zig-zag" +// encoding: Positive values x are written as 2*x + 0, negative values +// are written as 2*(^x) + 1; that is, negative numbers are complemented +// and whether to complement is encoded in bit 0. +// +// Design note: +// At most 10 bytes are needed for 64-bit values. The encoding could +// be more dense: a full 64-bit value needs an extra byte just to hold bit 63. +// Instead, the msb of the previous byte could be used to hold bit 63 since we +// know there can't be more than 64 bits. This is a trivial improvement and +// would reduce the maximum encoding length to 9 bytes. However, it breaks the +// invariant that the msb is always the "continuation bit" and thus makes the +// format incompatible with a varint encoding for larger numbers (say 128-bit). + +import ( + "io" + "os" +) + +// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer. +const ( + MaxVarintLen16 = 3 + MaxVarintLen32 = 5 + MaxVarintLen64 = 10 +) + +// PutUvarint encodes a uint64 into buf and returns the number of bytes written. +// If the buffer is too small, the result is the negated number of bytes required +// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x). +func PutUvarint(buf []byte, x uint64) int { + var i int + for i = range buf { + if x < 0x80 { + buf[i] = byte(x) + return i + 1 + } + buf[i] = byte(x) | 0x80 + x >>= 7 + } + // buffer too small; compute number of bytes required + for x >= 0x4000 { + x >>= 2 * 7 + i += 2 + } + if x >= 0x80 { + i++ + } + return -(i + 1) +} + +// Uvarint decodes a uint64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +// +func Uvarint(buf []byte) (uint64, int) { + var x uint64 + var s uint + for i, b := range buf { + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return 0, -(i + 1) // overflow + } + return x | uint64(b)<<s, i + 1 + } + x |= uint64(b&0x7f) << s + s += 7 + } + return 0, 0 +} + +// PutVarint encodes an int64 into buf and returns the number of bytes written. +// If the buffer is too small, the result is the negated number of bytes required +// (that is, -PutVarint(nil, x) is the number of bytes required to encode x). +func PutVarint(buf []byte, x int64) int { + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + return PutUvarint(buf, ux) +} + +// Varint decodes an int64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 with the following meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +// +func Varint(buf []byte) (int64, int) { + ux, n := Uvarint(buf) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, n +} + +// WriteUvarint encodes x and writes the result to w. +func WriteUvarint(w io.Writer, x uint64) os.Error { + var buf [MaxVarintLen64]byte + n := PutUvarint(buf[:], x) // won't fail + _, err := w.Write(buf[0:n]) + return err +} + +var overflow = os.NewError("binary: varint overflows a 64-bit integer") + +// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64. +func ReadUvarint(r io.ByteReader) (uint64, os.Error) { + var x uint64 + var s uint + for i := 0; ; i++ { + b, err := r.ReadByte() + if err != nil { + return x, err + } + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return x, overflow + } + return x | uint64(b)<<s, nil + } + x |= uint64(b&0x7f) << s + s += 7 + } + panic("unreachable") +} + +// WriteVarint encodes x and writes the result to w. +func WriteVarint(w io.Writer, x int64) os.Error { + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + return WriteUvarint(w, ux) +} + +// ReadVarint reads an encoded unsigned integer from r and returns it as a uint64. +func ReadVarint(r io.ByteReader) (int64, os.Error) { + ux, err := ReadUvarint(r) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, err +} diff --git a/src/pkg/encoding/binary/varint_test.go b/src/pkg/encoding/binary/varint_test.go new file mode 100644 index 0000000000..a85aceeeac --- /dev/null +++ b/src/pkg/encoding/binary/varint_test.go @@ -0,0 +1,175 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +import ( + "bytes" + "os" + "testing" +) + +func testConstant(t *testing.T, w uint, max int) { + n := -PutUvarint(nil, 1<<w-1) + if n != max { + t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n) + } +} + +func TestConstants(t *testing.T) { + testConstant(t, 16, MaxVarintLen16) + testConstant(t, 32, MaxVarintLen32) + testConstant(t, 64, MaxVarintLen64) +} + +func testVarint(t *testing.T, x int64) { + var buf1 [10]byte + n := PutVarint(buf1[:], x) + y, m := Varint(buf1[0:n]) + if x != y { + t.Errorf("Varint(%d): got %d", x, y) + } + if n != m { + t.Errorf("Varint(%d): got n = %d; want %d", x, m, n) + } + + var buf2 bytes.Buffer + err := WriteVarint(&buf2, x) + if err != nil { + t.Errorf("WriteVarint(%d): %s", x, err) + } + if n != buf2.Len() { + t.Errorf("WriteVarint(%d): got n = %d; want %d", x, buf2.Len(), n) + } + y, err = ReadVarint(&buf2) + if err != nil { + t.Errorf("ReadVarint(%d): %s", x, err) + } + if x != y { + t.Errorf("ReadVarint(%d): got %d", x, y) + } +} + +func testUvarint(t *testing.T, x uint64) { + var buf1 [10]byte + n := PutUvarint(buf1[:], x) + y, m := Uvarint(buf1[0:n]) + if x != y { + t.Errorf("Uvarint(%d): got %d", x, y) + } + if n != m { + t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n) + } + + var buf2 bytes.Buffer + err := WriteUvarint(&buf2, x) + if err != nil { + t.Errorf("WriteUvarint(%d): %s", x, err) + } + if n != buf2.Len() { + t.Errorf("WriteUvarint(%d): got n = %d; want %d", x, buf2.Len(), n) + } + y, err = ReadUvarint(&buf2) + if err != nil { + t.Errorf("ReadUvarint(%d): %s", x, err) + } + if x != y { + t.Errorf("ReadUvarint(%d): got %d", x, y) + } +} + +var tests = []int64{ + -1 << 63, + -1<<63 + 1, + -1, + 0, + 1, + 2, + 10, + 20, + 63, + 64, + 65, + 127, + 128, + 129, + 255, + 256, + 257, + 1<<63 - 1, +} + +func TestVarint(t *testing.T) { + for _, x := range tests { + testVarint(t, x) + testVarint(t, -x) + } + for x := int64(0x7); x != 0; x <<= 1 { + testVarint(t, x) + testVarint(t, -x) + } +} + +func TestUvarint(t *testing.T) { + for _, x := range tests { + testUvarint(t, uint64(x)) + } + for x := uint64(0x7); x != 0; x <<= 1 { + testUvarint(t, x) + } +} + +func TestBufferTooSmall(t *testing.T) { + for i := 0; i < 10; i++ { + buf := make([]byte, i) + x := uint64(1) << (uint(i) * 7) + n0 := -i + if i == 0 { + n0 = -1 // encoding 0 takes one byte + } + if n := PutUvarint(buf, x); n != n0 { + t.Errorf("PutUvarint([%d]byte, %d): got n = %d; want %d", len(buf), x, n, n0) + } + } + + buf := []byte{0x80, 0x80, 0x80, 0x80} + for i := 0; i <= len(buf); i++ { + buf := buf[0:i] + x, n := Uvarint(buf) + if x != 0 || n != 0 { + t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n) + } + + x, err := ReadUvarint(bytes.NewBuffer(buf)) + if x != 0 || err != os.EOF { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err) + } + } +} + +func testOverflow(t *testing.T, buf []byte, n0 int, err0 os.Error) { + x, n := Uvarint(buf) + if x != 0 || n != n0 { + t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0) + } + + x, err := ReadUvarint(bytes.NewBuffer(buf)) + if x != 0 || err != err0 { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0) + } +} + +func TestOverflow(t *testing.T) { + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow) + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow) +} + +func TestNonCanonicalZero(t *testing.T) { + buf := []byte{0x80, 0x80, 0x80, 0} + x, n := Uvarint(buf) + if x != 0 || n != 4 { + t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n) + + } +} |
